Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
bpo-35805: Add parser for Message-ID header.
This parser is based on the definition of Identification Fields from RFC 5322
Sec 3.6.4.

This should also prevent folding of Message-ID header using RFC 2047 encoded
words and hence fix bpo-35805.
  • Loading branch information
maxking committed May 18, 2019
commit 1e3c618cc0f86745ad92982830607d51000d8f09
123 changes: 122 additions & 1 deletion Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,12 @@ class DotAtomText(TokenList):
as_ew_allowed = True


class NoFoldLiteral(TokenList):

token_type = 'no-fold-literal'
as_ew_allowed = False


class AddrSpec(TokenList):

token_type = 'addr-spec'
Expand Down Expand Up @@ -836,6 +842,17 @@ class HeaderLabel(TokenList):
as_ew_allowed = False


class MsgID(TokenList):

token_type = 'msg-id'
as_ew_allowed = False


class MessageID(MsgID):

token_type = 'message-id'


class Header(TokenList):

token_type = 'header'
Expand Down Expand Up @@ -1583,7 +1600,7 @@ def get_addr_spec(value):
addr_spec.append(token)
if not value or value[0] != '@':
addr_spec.defects.append(errors.InvalidHeaderDefect(
"add-spec local part with no domain"))
"addr-spec local part with no domain"))
return addr_spec, value
addr_spec.append(ValueTerminal('@', 'address-at-symbol'))
token, value = get_domain(value[1:])
Expand Down Expand Up @@ -1968,6 +1985,110 @@ def get_address_list(value):
value = value[1:]
return address_list, value


def get_no_fold_literal(value):
""" no-fold-literal = "[" *dtext "]"
"""
no_fold_literal = NoFoldLiteral()
if not value:
raise errors.HeaderParseError(
"expected no-fold-literal but found '{}'".format(value))
if value[0] != '[':
raise errors.HeaderParseError(
"expected '[' at the start of no-fold-literal "
"but found '{}'".format(value))
no_fold_literal.append(ValueTerminal('[', 'no-fold-literal-start'))
value = value[1:]
token, value = get_dtext(value)
no_fold_literal.append(token)
if not value or value[0] != ']':
raise errors.HeaderParseError(
"expected ']' at the end of no-fold-literal "
"but found '{}'".format(value))
no_fold_literal.append(ValueTerminal(']', 'no-fold-literal-end'))
return no_fold_literal, value[1:]

def get_msg_id(value):
"""msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS]
id-left = dot-atom-text / obs-id-left
id-right = dot-atom-text / no-fold-literal / obs-id-right
no-fold-literal = "[" *dtext "]"
"""
msg_id = MsgID()
if value[0] in CFWS_LEADER:
token, value = get_cfws(value)
msg_id.append(token)
if not value or value[0] != '<':
raise errors.HeaderParseError(
"expected msg-id but found '{}'".format(value))
msg_id.append(ValueTerminal('<', 'msg-id-start'))
value = value[1:]
# Parse id-left.
try:
token, value = get_dot_atom_text(value)
except errors.HeaderParseError:
try:
# obs-id-left is same as local-part of add-spec.
token, value = get_obs_local_part(value)
msg_id.defects.append(errors.ObsoleteHeaderDefect(
"obsolete id-left in msg-id"))
except errors.HeaderParseError:
raise errors.HeaderParseError(
"expected dot-atom-text or obs-id-left"
" but found '{}'".format(value))
msg_id.append(token)
if not value or value[0] != '@':
msg_id.defects.append(errors.InvalidHeaderDefect(
"msg-id with no id-right"))
# Even though there is no id-right, if the local part
# ends with `>` let's just parse it too and return
# along with the defect.
if value and value[0] == '>':
msg_id.append(ValueTerminal('>', 'msg-id-end'))
value = value[1:]
return msg_id, value
msg_id.append(ValueTerminal('@', 'address-at-symbol'))
value = value[1:]
# Parse id-right.
try:
token, value = get_dot_atom_text(value)
except errors.HeaderParseError:
try:
token, value = get_no_fold_literal(value)
except errors.HeaderParseError as e:
try:
token, value = get_domain(value)
msg_id.defects.append(errors.ObsoleteHeaderDefect(
"obsolete id-right in msg-id"))
except errors.HeaderParseError:
raise errors.HeaderParseError(
"expected dot-atom-text, no-fold-literal or obs-id-right"
" but found '{}'".format(value))
msg_id.append(token)
if value and value[0] == '>':
value = value[1:]
else:
msg_id.defects.append(errors.InvalidHeaderDefect(
"missing trailing '>' on msg-id"))
msg_id.append(ValueTerminal('>', 'msg-id-end'))
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
msg_id.append(token)
return msg_id, value


def parse_message_id(value):
"""message-id = "Message-ID:" msg-id CRLF
"""
message_id = MessageID()
try:
token, value = get_msg_id(value)
except errors.HeaderParseError:
message_id.defects.append(errors.InvalidHeaderDefect(
"Expected msg-id but found {!r}".format(value)))
message_id.append(token)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to bomb out when building hyper kitty, because token is referenced before assignment, when try block fails.

======================================================================
ERROR: test_long_message_id (hyperkitty.tests.lib.test_incoming.TestAddToList)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "./hyperkitty/tests/lib/test_incoming.py", line 295, in test_long_message_id
    msg["Message-ID"] = "X" * 260
  File "/usr/lib/python3.8/email/message.py", line 409, in __setitem__
    self._headers.append(self.policy.header_store_parse(name, val))
  File "/usr/lib/python3.8/email/policy.py", line 148, in header_store_parse
    return (name, self.header_factory(name, value))
  File "/usr/lib/python3.8/email/headerregistry.py", line 602, in __call__
    return self[name](name, value)
  File "/usr/lib/python3.8/email/headerregistry.py", line 197, in __new__
    cls.parse(value, kwds)
  File "/usr/lib/python3.8/email/headerregistry.py", line 530, in parse
    kwds['parse_tree'] = parse_tree = cls.value_parser(value)
  File "/usr/lib/python3.8/email/_header_value_parser.py", line 2116, in parse_message_id
    message_id.append(token)
UnboundLocalError: local variable 'token' referenced before assignment

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, same for me. Also, accessing value[0] before checking if value is there…

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@surkova Thank you! I think this should be fixed up properly. Opened BPO https://bugs.python.org/issue38708

return message_id

#
# XXX: As I begin to add additional header parsers, I'm realizing we probably
# have two level of parser routines: the get_XXX methods that get a token in
Expand Down
13 changes: 13 additions & 0 deletions Lib/email/headerregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,18 @@ def cte(self):
return self._cte


class MessageIDHeader:

max_count = 1
value_parser = staticmethod(parser.parse_message_id)

@classmethod
def parse(cls, value, kwds):
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
kwds['decoded'] = str(parse_tree)
kwds['defects'].extend(parse_tree.all_defects)


# The header factory #

_default_header_map = {
Expand All @@ -542,6 +554,7 @@ def cte(self):
'content-type': ContentTypeHeader,
'content-disposition': ContentDispositionHeader,
'content-transfer-encoding': ContentTransferEncodingHeader,
'message-id': MessageIDHeader,
}

class HeaderRegistry:
Expand Down
72 changes: 72 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2494,6 +2494,78 @@ def test_invalid_content_transfer_encoding(self):
";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3
)

# get_msg_id

def test_get_msg_id_valid(self):
msg_id = self._test_get_x(
parser.get_msg_id,
"<[email protected]>",
"<[email protected]>",
"<[email protected]>",
[],
'',
)
self.assertEqual(msg_id.token_type, 'msg-id')

def test_get_msg_id_obsolete_local(self):
msg_id = self._test_get_x(
parser.get_msg_id,
'<"simeple.local"@example.com>',
'<"simeple.local"@example.com>',
'<[email protected]>',
[errors.ObsoleteHeaderDefect],
'',
)
self.assertEqual(msg_id.token_type, 'msg-id')

def test_get_msg_id_non_folding_literal_domain(self):
msg_id = self._test_get_x(
parser.get_msg_id,
"<simple.local@[someexamplecom.domain]>",
"<simple.local@[someexamplecom.domain]>",
"<simple.local@[someexamplecom.domain]>",
[],
"",
)
self.assertEqual(msg_id.token_type, 'msg-id')


def test_get_msg_id_obsolete_domain_part(self):
msg_id = self._test_get_x(
parser.get_msg_id,
"<simplelocal@(old)example.com>",
"<simplelocal@(old)example.com>",
"<simplelocal@ example.com>",
[errors.ObsoleteHeaderDefect],
""
)

def test_get_msg_id_no_id_right_part(self):
msg_id = self._test_get_x(
parser.get_msg_id,
"<simplelocal>",
"<simplelocal>",
"<simplelocal>",
[errors.InvalidHeaderDefect],
""
)
self.assertEqual(msg_id.token_type, 'msg-id')

def test_get_msg_id_no_angle_start(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_msg_id("msgwithnoankle")

def test_get_msg_id_no_angle_end(self):
msg_id = self._test_get_x(
parser.get_msg_id,
"<simplelocal@domain",
"<simplelocal@domain>",
"<simplelocal@domain>",
[errors.InvalidHeaderDefect],
""
)
self.assertEqual(msg_id.token_type, 'msg-id')


@parameterize
class Test_parse_mime_parameters(TestParserMixin, TestEmailBase):
Expand Down
7 changes: 7 additions & 0 deletions Lib/test/test_email/test_headerregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,13 @@ def test_fold_overlong_words_using_RFC2047(self):
'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-xx-xx?=\n'
' =?utf-8?q?=3E?=\n')

def test_message_id_header_is_not_folded(self):
h = self.make_header(
'Message-ID',
'<[email protected]>')
self.assertEqual(
h.fold(policy=policy.default.clone(max_line_length=20)),
'Message-ID: <[email protected]>\n')

if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add parser for Message-ID header and add it to default HeaderRegistry. This
should prevent folding of Message-ID using RFC 2048 encoded words.