-
-
Notifications
You must be signed in to change notification settings - Fork 33.7k
bpo-35805: Add parser for Message-ID email header. #13397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
This parser is based on the definition of Identification Fields from RFC 5322 Sec 3.6.4. This should also prevent folding of Message-ID header using RFC 2047 encoded words and hence fix bpo-35805.
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -506,6 +506,12 @@ class DotAtomText(TokenList): | |
| as_ew_allowed = True | ||
|
|
||
|
|
||
| class NoFoldLiteral(TokenList): | ||
|
|
||
| token_type = 'no-fold-literal' | ||
| as_ew_allowed = False | ||
|
|
||
|
|
||
| class AddrSpec(TokenList): | ||
|
|
||
| token_type = 'addr-spec' | ||
|
|
@@ -836,6 +842,17 @@ class HeaderLabel(TokenList): | |
| as_ew_allowed = False | ||
|
|
||
|
|
||
| class MsgID(TokenList): | ||
|
|
||
| token_type = 'msg-id' | ||
| as_ew_allowed = False | ||
|
|
||
|
|
||
| class MessageID(MsgID): | ||
|
|
||
| token_type = 'message-id' | ||
|
|
||
|
|
||
| class Header(TokenList): | ||
|
|
||
| token_type = 'header' | ||
|
|
@@ -1583,7 +1600,7 @@ def get_addr_spec(value): | |
| addr_spec.append(token) | ||
| if not value or value[0] != '@': | ||
| addr_spec.defects.append(errors.InvalidHeaderDefect( | ||
| "add-spec local part with no domain")) | ||
| "addr-spec local part with no domain")) | ||
| return addr_spec, value | ||
| addr_spec.append(ValueTerminal('@', 'address-at-symbol')) | ||
| token, value = get_domain(value[1:]) | ||
|
|
@@ -1968,6 +1985,110 @@ def get_address_list(value): | |
| value = value[1:] | ||
| return address_list, value | ||
|
|
||
|
|
||
| def get_no_fold_literal(value): | ||
| """ no-fold-literal = "[" *dtext "]" | ||
| """ | ||
| no_fold_literal = NoFoldLiteral() | ||
| if not value: | ||
| raise errors.HeaderParseError( | ||
| "expected no-fold-literal but found '{}'".format(value)) | ||
| if value[0] != '[': | ||
| raise errors.HeaderParseError( | ||
| "expected '[' at the start of no-fold-literal " | ||
| "but found '{}'".format(value)) | ||
| no_fold_literal.append(ValueTerminal('[', 'no-fold-literal-start')) | ||
| value = value[1:] | ||
| token, value = get_dtext(value) | ||
| no_fold_literal.append(token) | ||
| if not value or value[0] != ']': | ||
| raise errors.HeaderParseError( | ||
| "expected ']' at the end of no-fold-literal " | ||
| "but found '{}'".format(value)) | ||
| no_fold_literal.append(ValueTerminal(']', 'no-fold-literal-end')) | ||
| return no_fold_literal, value[1:] | ||
|
|
||
| def get_msg_id(value): | ||
| """msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS] | ||
| id-left = dot-atom-text / obs-id-left | ||
| id-right = dot-atom-text / no-fold-literal / obs-id-right | ||
| no-fold-literal = "[" *dtext "]" | ||
| """ | ||
| msg_id = MsgID() | ||
| if value[0] in CFWS_LEADER: | ||
| token, value = get_cfws(value) | ||
| msg_id.append(token) | ||
| if not value or value[0] != '<': | ||
| raise errors.HeaderParseError( | ||
| "expected msg-id but found '{}'".format(value)) | ||
| msg_id.append(ValueTerminal('<', 'msg-id-start')) | ||
| value = value[1:] | ||
| # Parse id-left. | ||
| try: | ||
| token, value = get_dot_atom_text(value) | ||
| except errors.HeaderParseError: | ||
| try: | ||
| # obs-id-left is same as local-part of add-spec. | ||
| token, value = get_obs_local_part(value) | ||
| msg_id.defects.append(errors.ObsoleteHeaderDefect( | ||
| "obsolete id-left in msg-id")) | ||
| except errors.HeaderParseError: | ||
| raise errors.HeaderParseError( | ||
| "expected dot-atom-text or obs-id-left" | ||
| " but found '{}'".format(value)) | ||
| msg_id.append(token) | ||
| if not value or value[0] != '@': | ||
| msg_id.defects.append(errors.InvalidHeaderDefect( | ||
| "msg-id with no id-right")) | ||
| # Even though there is no id-right, if the local part | ||
| # ends with `>` let's just parse it too and return | ||
| # along with the defect. | ||
| if value and value[0] == '>': | ||
| msg_id.append(ValueTerminal('>', 'msg-id-end')) | ||
| value = value[1:] | ||
| return msg_id, value | ||
| msg_id.append(ValueTerminal('@', 'address-at-symbol')) | ||
| value = value[1:] | ||
| # Parse id-right. | ||
| try: | ||
| token, value = get_dot_atom_text(value) | ||
| except errors.HeaderParseError: | ||
| try: | ||
| token, value = get_no_fold_literal(value) | ||
| except errors.HeaderParseError as e: | ||
| try: | ||
| token, value = get_domain(value) | ||
| msg_id.defects.append(errors.ObsoleteHeaderDefect( | ||
| "obsolete id-right in msg-id")) | ||
| except errors.HeaderParseError: | ||
| raise errors.HeaderParseError( | ||
maxking marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| "expected dot-atom-text, no-fold-literal or obs-id-right" | ||
| " but found '{}'".format(value)) | ||
| msg_id.append(token) | ||
| if value and value[0] == '>': | ||
| value = value[1:] | ||
| else: | ||
| msg_id.defects.append(errors.InvalidHeaderDefect( | ||
| "missing trailing '>' on msg-id")) | ||
| msg_id.append(ValueTerminal('>', 'msg-id-end')) | ||
| if value and value[0] in CFWS_LEADER: | ||
| token, value = get_cfws(value) | ||
| msg_id.append(token) | ||
| return msg_id, value | ||
|
|
||
|
|
||
| def parse_message_id(value): | ||
| """message-id = "Message-ID:" msg-id CRLF | ||
| """ | ||
| message_id = MessageID() | ||
| try: | ||
| token, value = get_msg_id(value) | ||
| except errors.HeaderParseError: | ||
| message_id.defects.append(errors.InvalidHeaderDefect( | ||
| "Expected msg-id but found {!r}".format(value))) | ||
| message_id.append(token) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This appears to bomb out when building hyper kitty, because token is referenced before assignment, when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, same for me. Also, accessing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @surkova Thank you! I think this should be fixed up properly. Opened BPO https://bugs.python.org/issue38708 |
||
| return message_id | ||
|
|
||
| # | ||
| # XXX: As I begin to add additional header parsers, I'm realizing we probably | ||
| # have two level of parser routines: the get_XXX methods that get a token in | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2494,6 +2494,78 @@ def test_invalid_content_transfer_encoding(self): | |
| ";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3 | ||
| ) | ||
|
|
||
| # get_msg_id | ||
|
|
||
| def test_get_msg_id_valid(self): | ||
| msg_id = self._test_get_x( | ||
| parser.get_msg_id, | ||
| "<[email protected]>", | ||
| "<[email protected]>", | ||
| "<[email protected]>", | ||
| [], | ||
| '', | ||
| ) | ||
| self.assertEqual(msg_id.token_type, 'msg-id') | ||
|
|
||
| def test_get_msg_id_obsolete_local(self): | ||
| msg_id = self._test_get_x( | ||
| parser.get_msg_id, | ||
| '<"simeple.local"@example.com>', | ||
| '<"simeple.local"@example.com>', | ||
| '<[email protected]>', | ||
| [errors.ObsoleteHeaderDefect], | ||
| '', | ||
| ) | ||
| self.assertEqual(msg_id.token_type, 'msg-id') | ||
|
|
||
| def test_get_msg_id_non_folding_literal_domain(self): | ||
| msg_id = self._test_get_x( | ||
| parser.get_msg_id, | ||
| "<simple.local@[someexamplecom.domain]>", | ||
| "<simple.local@[someexamplecom.domain]>", | ||
| "<simple.local@[someexamplecom.domain]>", | ||
| [], | ||
| "", | ||
| ) | ||
| self.assertEqual(msg_id.token_type, 'msg-id') | ||
|
|
||
|
|
||
| def test_get_msg_id_obsolete_domain_part(self): | ||
| msg_id = self._test_get_x( | ||
| parser.get_msg_id, | ||
| "<simplelocal@(old)example.com>", | ||
| "<simplelocal@(old)example.com>", | ||
| "<simplelocal@ example.com>", | ||
| [errors.ObsoleteHeaderDefect], | ||
| "" | ||
| ) | ||
|
|
||
| def test_get_msg_id_no_id_right_part(self): | ||
| msg_id = self._test_get_x( | ||
| parser.get_msg_id, | ||
| "<simplelocal>", | ||
| "<simplelocal>", | ||
| "<simplelocal>", | ||
| [errors.InvalidHeaderDefect], | ||
| "" | ||
| ) | ||
| self.assertEqual(msg_id.token_type, 'msg-id') | ||
|
|
||
| def test_get_msg_id_no_angle_start(self): | ||
| with self.assertRaises(errors.HeaderParseError): | ||
| parser.get_msg_id("msgwithnoankle") | ||
|
|
||
| def test_get_msg_id_no_angle_end(self): | ||
| msg_id = self._test_get_x( | ||
| parser.get_msg_id, | ||
| "<simplelocal@domain", | ||
| "<simplelocal@domain>", | ||
| "<simplelocal@domain>", | ||
| [errors.InvalidHeaderDefect], | ||
| "" | ||
| ) | ||
| self.assertEqual(msg_id.token_type, 'msg-id') | ||
|
|
||
|
|
||
| @parameterize | ||
| class Test_parse_mime_parameters(TestParserMixin, TestEmailBase): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1648,6 +1648,13 @@ def test_fold_overlong_words_using_RFC2047(self): | |
| 'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-xx-xx?=\n' | ||
| ' =?utf-8?q?=3E?=\n') | ||
|
|
||
| def test_message_id_header_is_not_folded(self): | ||
| h = self.make_header( | ||
| 'Message-ID', | ||
| '<[email protected]>') | ||
| self.assertEqual( | ||
| h.fold(policy=policy.default.clone(max_line_length=20)), | ||
| 'Message-ID: <[email protected]>\n') | ||
maxking marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| if __name__ == '__main__': | ||
| unittest.main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Add parser for Message-ID header and add it to default HeaderRegistry. This | ||
| should prevent folding of Message-ID using RFC 2048 encoded words. |
Uh oh!
There was an error while loading. Please reload this page.