Browse code

* S3/Utils.py: Reworked XML helpers - remove XMLNS before parsing the input XML to avoid having all Tags prefixed with {XMLNS} by ElementTree.

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@326 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2009/01/06 21:00:29
Showing 2 changed files
... ...
@@ -1,3 +1,9 @@
1
+2009-01-07  Michal Ludvig  <michal@logix.cz>
2
+
3
+	* S3/Utils.py: Reworked XML helpers - remove XMLNS before 
4
+	  parsing the input XML to avoid having all Tags prefixed
5
+	  with {XMLNS} by ElementTree.
6
+
1 7
 2009-01-03  Michal Ludvig  <michal@logix.cz>
2 8
 
3 9
 	* s3cmd: Don't fail when neither $HOME nor %USERPROFILE% is set.
... ...
@@ -23,25 +23,7 @@ try:
23 23
 except ImportError:
24 24
 	import elementtree.ElementTree as ET
25 25
 
26
-def stripTagXmlns(xmlns, tag):
27
-	"""
28
-	Returns a function that, given a tag name argument, removes
29
-	eventual ElementTree xmlns from it.
30
-
31
-	Example:
32
-		stripTagXmlns("{myXmlNS}tag") -> "tag"
33
-	"""
34
-	if not xmlns:
35
-		return tag
36
-	return re.sub(xmlns, "", tag)
37
-
38
-def fixupXPath(xmlns, xpath, max = 0):
39
-	if not xmlns:
40
-		return xpath
41
-	retval = re.subn("//", "//%s" % xmlns, xpath, max)[0]
42
-	return retval
43
-
44
-def parseNodes(nodes, xmlns = ""):
26
+def parseNodes(nodes):
45 27
 	## WARNING: Ignores text nodes from mixed xml/text.
46 28
 	## For instance <tag1>some text<tag2>other text</tag2></tag1>
47 29
 	## will be ignore "some text" node
... ...
@@ -49,9 +31,9 @@ def parseNodes(nodes, xmlns = ""):
49 49
 	for node in nodes:
50 50
 		retval_item = {}
51 51
 		for child in node.getchildren():
52
-			name = stripTagXmlns(xmlns, child.tag)
52
+			name = child.tag
53 53
 			if child.getchildren():
54
-				retval_item[name] = parseNodes([child], xmlns)
54
+				retval_item[name] = parseNodes([child])
55 55
 			else:
56 56
 				retval_item[name] = node.findtext(".//%s" % child.tag)
57 57
 		retval.append(retval_item)
... ...
@@ -62,26 +44,36 @@ def getNameSpace(element):
62 62
 		return ""
63 63
 	return re.compile("^(\{[^}]+\})").match(element.tag).groups()[0]
64 64
 
65
+def stripNameSpace(xml):
66
+	"""
67
+	removeNameSpace(xml) -- remove top-level AWS namespace
68
+	"""
69
+	r = re.compile('^(<?[^>]+?>\s?)(<\w+) xmlns=[\'"](http://[^\'"]+)[\'"](.*)', re.MULTILINE)
70
+	xmlns = r.match(xml).groups()[2]
71
+	xml = r.sub("\\1\\2\\4", xml)
72
+	return xml, xmlns
73
+
65 74
 def getTreeFromXml(xml):
75
+	xml, xmlns = stripNameSpace(xml)
66 76
 	tree = ET.fromstring(xml)
67
-	tree.xmlns = getNameSpace(tree)
77
+	tree.attrib['xmlns'] = xmlns
68 78
 	return tree
69 79
 	
70 80
 def getListFromXml(xml, node):
71 81
 	tree = getTreeFromXml(xml)
72
-	nodes = tree.findall('.//%s%s' % (tree.xmlns, node))
73
-	return parseNodes(nodes, tree.xmlns)
82
+	nodes = tree.findall('.//%s' % (node))
83
+	return parseNodes(nodes)
74 84
 	
75 85
 def getTextFromXml(xml, xpath):
76 86
 	tree = getTreeFromXml(xml)
77 87
 	if tree.tag.endswith(xpath):
78 88
 		return tree.text
79 89
 	else:
80
-		return tree.findtext(fixupXPath(tree.xmlns, xpath))
90
+		return tree.findtext(xpath)
81 91
 
82 92
 def getRootTagName(xml):
83 93
 	tree = getTreeFromXml(xml)
84
-	return stripTagXmlns(tree.xmlns, tree.tag)
94
+	return tree.tag
85 95
 
86 96
 def dateS3toPython(date):
87 97
 	date = re.compile("\.\d\d\dZ").sub(".000Z", date)