Skip to content

Commit aca404a

Browse files
committed
Prevent folding of non-ascii message-id headers.
Also, remove empty lines from classes that don't have any methods.
1 parent 1e3c618 commit aca404a

File tree

3 files changed

+46
-30
lines changed

3 files changed

+46
-30
lines changed

Doc/library/email.headerregistry.rst

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -321,19 +321,26 @@ variant, :attr:`~.BaseHeader.max_count` is set to 1.
321321

322322
The default mappings are:
323323

324-
:subject: UniqueUnstructuredHeader
325-
:date: UniqueDateHeader
326-
:resent-date: DateHeader
327-
:orig-date: UniqueDateHeader
328-
:sender: UniqueSingleAddressHeader
329-
:resent-sender: SingleAddressHeader
330-
:to: UniqueAddressHeader
331-
:resent-to: AddressHeader
332-
:cc: UniqueAddressHeader
333-
:resent-cc: AddressHeader
334-
:from: UniqueAddressHeader
335-
:resent-from: AddressHeader
336-
:reply-to: UniqueAddressHeader
324+
:subject: UniqueUnstructuredHeader
325+
:date: UniqueDateHeader
326+
:resent-date: DateHeader
327+
:orig-date: UniqueDateHeader
328+
:sender: UniqueSingleAddressHeader
329+
:resent-sender: SingleAddressHeader
330+
:to: UniqueAddressHeader
331+
:resent-to: AddressHeader
332+
:cc: UniqueAddressHeader
333+
:resent-cc: AddressHeader
334+
:bcc: UniqueAddressHeader
335+
:resent-bcc: AddressHeader
336+
:from: UniqueAddressHeader
337+
:resent-from: AddressHeader
338+
:reply-to: UniqueAddressHeader
339+
:mime-version: MIMEVersionHeader
340+
:content-type: ContentTypeHeader
341+
:content-disposition: ContentDispositionHeader
342+
:content-transfer-encoding: ContentTransferEncodingHeader
343+
:message-id: MessageIDHeader
337344

338345
``HeaderRegistry`` has the following methods:
339346

Lib/email/_header_value_parser.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -179,37 +179,30 @@ def comments(self):
179179

180180

181181
class UnstructuredTokenList(TokenList):
182-
183182
token_type = 'unstructured'
184183

185184

186185
class Phrase(TokenList):
187-
188186
token_type = 'phrase'
189187

190188
class Word(TokenList):
191-
192189
token_type = 'word'
193190

194191

195192
class CFWSList(WhiteSpaceTokenList):
196-
197193
token_type = 'cfws'
198194

199195

200196
class Atom(TokenList):
201-
202197
token_type = 'atom'
203198

204199

205200
class Token(TokenList):
206-
207201
token_type = 'token'
208202
encode_as_ew = False
209203

210204

211205
class EncodedWord(TokenList):
212-
213206
token_type = 'encoded-word'
214207
cte = None
215208
charset = None
@@ -496,18 +489,15 @@ def domain(self):
496489

497490

498491
class DotAtom(TokenList):
499-
500492
token_type = 'dot-atom'
501493

502494

503495
class DotAtomText(TokenList):
504-
505496
token_type = 'dot-atom-text'
506497
as_ew_allowed = True
507498

508499

509500
class NoFoldLiteral(TokenList):
510-
511501
token_type = 'no-fold-literal'
512502
as_ew_allowed = False
513503

@@ -815,46 +805,40 @@ def params(self):
815805

816806

817807
class ContentType(ParameterizedHeaderValue):
818-
819808
token_type = 'content-type'
820809
as_ew_allowed = False
821810
maintype = 'text'
822811
subtype = 'plain'
823812

824813

825814
class ContentDisposition(ParameterizedHeaderValue):
826-
827815
token_type = 'content-disposition'
828816
as_ew_allowed = False
829817
content_disposition = None
830818

831819

832820
class ContentTransferEncoding(TokenList):
833-
834821
token_type = 'content-transfer-encoding'
835822
as_ew_allowed = False
836823
cte = '7bit'
837824

838825

839826
class HeaderLabel(TokenList):
840-
841827
token_type = 'header-label'
842828
as_ew_allowed = False
843829

844830

845831
class MsgID(TokenList):
846-
847832
token_type = 'msg-id'
848833
as_ew_allowed = False
834+
fold_subparts = False
849835

850836

851837
class MessageID(MsgID):
852-
853838
token_type = 'message-id'
854839

855840

856841
class Header(TokenList):
857-
858842
token_type = 'header'
859843

860844

@@ -2763,6 +2747,10 @@ def _refold_parse_tree(parse_tree, *, policy):
27632747
# to unpacking the subparts and wrapping them.
27642748
if not hasattr(part, 'encode'):
27652749
# It's not a Terminal, do each piece individually.
2750+
if not getattr(part, 'fold_subparts', True):
2751+
# This part can't be folded and also doesn't allow folding
2752+
# of subparts.
2753+
wrap_as_ew_blocked += 1
27662754
parts = list(part) + parts
27672755
else:
27682756
# It's a terminal, wrap it as an encoded word, possibly

Lib/test/test_email/test_headerregistry.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,5 +1656,26 @@ def test_message_id_header_is_not_folded(self):
16561656
h.fold(policy=policy.default.clone(max_line_length=20)),
16571657
'Message-ID: <[email protected]>\n')
16581658

1659+
# Test message-id isn't folded when id-right is no-fold-literal.
1660+
h = self.make_header(
1661+
'Message-ID',
1662+
'<somemessageidlongerthan@[127.0.0.0.0.0.0.0.0.1]>')
1663+
self.assertEqual(
1664+
h.fold(policy=policy.default.clone(max_line_length=20)),
1665+
'Message-ID: <somemessageidlongerthan@[127.0.0.0.0.0.0.0.0.1]>\n')
1666+
1667+
# Test message-id isn't folded when id-right is non-ascii characters.
1668+
h = self.make_header('Message-ID', '<ईमेल@wők.com>')
1669+
self.assertEqual(
1670+
h.fold(policy=policy.default.clone(max_line_length=30)),
1671+
'Message-ID: <ईमेल@wők.com>\n')
1672+
1673+
# Test message-id is folded without breaking the msg-id token into
1674+
# encoded words, *even* if they don't fit into max_line_length.
1675+
h = self.make_header('Message-ID', '<ईमेलfromMessage@wők.com>')
1676+
self.assertEqual(
1677+
h.fold(policy=policy.default.clone(max_line_length=20)),
1678+
'Message-ID:\n <ईमेलfromMessage@wők.com>\n')
1679+
16591680
if __name__ == '__main__':
16601681
unittest.main()

0 commit comments

Comments
 (0)