File tree Expand file tree Collapse file tree 3 files changed +27
-0
lines changed Expand file tree Collapse file tree 3 files changed +27
-0
lines changed Original file line number Diff line number Diff line change @@ -33,6 +33,8 @@ History
3333 become ``gmail.com ``.
3434* Additional ``gmail.com `` typos are now normalized when ``hash_email `` is
3535 used. For example, ``gmali.com `` will become ``gmail.com ``.
36+ * When ``hash_email `` is used, the local part of an email address is now
37+ normalized to NFC.
3638
37392.9.0 (2023-12-05)
3840++++++++++++++++++
Original file line number Diff line number Diff line change 88import re
99import warnings
1010import hashlib
11+ import unicodedata
1112from typing import Any , Dict
1213from voluptuous import MultipleInvalid
1314
@@ -364,6 +365,8 @@ def _clean_email(address):
364365 domain = _clean_domain (address [at_idx + 1 :]) # noqa
365366 local_part = address [:at_idx ]
366367
368+ local_part = unicodedata .normalize ("NFC" , local_part )
369+
367370 # Strip off aliased part of email address.
368371 if domain in _YAHOO_DOMAINS :
369372 divider = "-"
Original file line number Diff line number Diff line change @@ -141,6 +141,26 @@ def test_maybe_hash_email(self):
141141 }
142142 },
143143 },
144+ {
145+ "name" : "email local part nfc normalization form 1" ,
146+ "input" : {"email" : {"address" : "bu\u0308 cher@example.com" }},
147+ "expected" : {
148+ "email" : {
149+ "address" : "53550c712b146287a2d0dd30e5ed6f4b" ,
150+ "domain" : "example.com" ,
151+ }
152+ },
153+ },
154+ {
155+ "name" : "email local part nfc normalization form 2" ,
156+ "input" : {"email" : {"address" : "b\u00FC cher@example.com" }},
157+ "expected" : {
158+ "email" : {
159+ "address" : "53550c712b146287a2d0dd30e5ed6f4b" ,
160+ "domain" : "example.com" ,
161+ }
162+ },
163+ },
144164 ]
145165
146166 for test in tests :
@@ -231,6 +251,8 @@ def test_clean_email():
231251 {"input" : "foo@example.comcom" , "output" : "foo@example.com" },
232252 {"input" : "foo@example.com." , "output" : "foo@example.com" },
233253 {"input" : "foo@example.com..." , "output" : "foo@example.com" },
254+ {"input" : "example@bu\u0308 cher.com" , "output" : "example@xn--bcher-kva.com" },
255+ {"input" : "example@b\u00FC cher.com" , "output" : "example@xn--bcher-kva.com" },
234256 ]
235257
236258 for test in tests :
You can’t perform that action at this time.
0 commit comments