Skip to content

Commit 86368e9

Browse files
committed
Fix a crash when incorrect parser input occurs together with usages of iterwalk() on trees generated by the same parser.
1 parent 50c2764 commit 86368e9

File tree

3 files changed

+30
-8
lines changed

3 files changed

+30
-8
lines changed

src/lxml/apihelpers.pxi

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
246246
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
247247
c_ns = c_node.nsDef
248248
while c_ns is not NULL:
249-
prefix = funicodeOrNone(c_ns.prefix)
250-
if prefix not in nsmap:
251-
nsmap[prefix] = funicodeOrNone(c_ns.href)
249+
if c_ns.prefix or c_ns.href:
250+
prefix = funicodeOrNone(c_ns.prefix)
251+
if prefix not in nsmap:
252+
nsmap[prefix] = funicodeOrNone(c_ns.href)
252253
c_ns = c_ns.next
253254
c_node = c_node.parent
254255
return nsmap

src/lxml/iterparse.pxi

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
420420
count = 0
421421
c_ns = c_node.nsDef
422422
while c_ns is not NULL:
423-
count += 1
423+
count += (c_ns.href is not NULL)
424424
c_ns = c_ns.next
425425
return count
426426

@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
431431
count = 0
432432
c_ns = c_node.nsDef
433433
while c_ns is not NULL:
434-
ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
435-
funicode(c_ns.href))
436-
event_list.append( (u"start-ns", ns_tuple) )
437-
count += 1
434+
if c_ns.href:
435+
ns_tuple = (funicodeOrEmpty(c_ns.prefix),
436+
funicode(c_ns.href))
437+
event_list.append( (u"start-ns", ns_tuple) )
438+
count += 1
438439
c_ns = c_ns.next
439440
return count

src/lxml/tests/test_etree.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,6 +1460,26 @@ def test_iterwalk_getiterator(self):
14601460
[1,2,1,4],
14611461
counts)
14621462

1463+
def test_walk_after_parse_failure(self):
1464+
# This used to be an issue because libxml2 can leak empty namespaces
1465+
# between failed parser runs. iterwalk() failed to handle such a tree.
1466+
try:
1467+
etree.XML('''<anot xmlns="1">''')
1468+
except etree.XMLSyntaxError:
1469+
pass
1470+
else:
1471+
assert False, "invalid input did not fail to parse"
1472+
1473+
et = etree.XML('''<root> </root>''')
1474+
try:
1475+
ns = next(etree.iterwalk(et, events=('start-ns',)))
1476+
except StopIteration:
1477+
# This would be the expected result, because there was no namespace
1478+
pass
1479+
else:
1480+
# This is a bug in libxml2
1481+
assert not ns, repr(ns)
1482+
14631483
def test_itertext_comment_pi(self):
14641484
# https://bugs.launchpad.net/lxml/+bug/1844674
14651485
XML = self.etree.XML

0 commit comments

Comments
 (0)