diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py index 1467a53..5845248 100644 --- a/nameparser/config/titles.py +++ b/nameparser/config/titles.py @@ -230,6 +230,7 @@ 'designer', 'detective', 'developer', + 'dhr', 'diplomat', 'dir', 'director', @@ -289,7 +290,11 @@ 'forester', 'founder', 'fr', + 'frau', 'friar', + 'frk', + 'frøken', + 'fru', 'gaf', 'gen', 'general', @@ -312,6 +317,10 @@ 'heiress', 'her', 'hereditary', + 'heren', + 'herr', + 'herren', + 'herrn', 'high', 'highness', 'his', @@ -323,6 +332,7 @@ 'honorable', 'honourable', 'host', + 'hr', 'illustrator', 'imam', 'industrialist', @@ -398,6 +408,9 @@ 'merchant', 'met', 'metropolitan', + 'mevr', + 'mevrouw', + 'mevrouwe', 'mg', 'mgr', 'mgysgt', @@ -412,6 +425,8 @@ 'mobster', 'model', 'monk', + 'monseigneur', + 'monsieur', 'monsignor', 'most', 'mountaineer', @@ -538,7 +553,15 @@ 'security', 'seigneur', 'senator', + 'senhor', + 'senhora', + 'senhorita', 'senior', + 'señor', + 'señora', + 'señores', + 'señorita', + 'señoritas', 'senior-judge', 'sergeant', 'servant', @@ -551,6 +574,10 @@ 'sheikh', 'sheriff', 'siddha', + 'signor', + 'signora', + 'signore', + 'signorina', 'singer', 'singer-songwriter', 'sma', @@ -569,6 +596,8 @@ 'sr', 'sra', 'srta', + 'srtas', + 'sres', 'ssg', 'ssgt', 'st', @@ -619,6 +648,7 @@ 'vizier', 'vocalist', 'voice', + 'vrouwe', 'warden', 'warrant', 'wing', diff --git a/tests/test_titles.py b/tests/test_titles.py index 7b06121..5bad578 100644 --- a/tests/test_titles.py +++ b/tests/test_titles.py @@ -239,3 +239,36 @@ def test_2_same_prefixes_in_the_name(self) -> None: self.m(hh.first, "Vincent", hh) self.m(hh.middle, "van Gogh", hh) self.m(hh.last, "van Beethoven", hh) + + # Non-ASCII title normalization — confirm diacritic titles survive + # the lowercase lookup path end-to-end. + + def test_señora_non_ascii_title(self) -> None: + hn = HumanName("Señora María García") + self.m(hn.title, "Señora", hn) + self.m(hn.first, "María", hn) + self.m(hn.last, "García", hn) + + def test_señora_lowercase_non_ascii_title(self) -> None: + hn = HumanName("señora María García") + self.m(hn.title, "señora", hn) + self.m(hn.first, "María", hn) + self.m(hn.last, "García", hn) + + def test_frøken_non_ascii_title(self) -> None: + hn = HumanName("Frøken Jensen") + self.m(hn.title, "Frøken", hn) + self.m(hn.first, "", hn) + self.m(hn.last, "Jensen", hn) + + def test_herr_title_not_first_name(self) -> None: + hn = HumanName("Herr Schmidt") + self.m(hn.title, "Herr", hn) + self.m(hn.first, "", hn) + self.m(hn.last, "Schmidt", hn) + + def test_herr_title_with_first_name(self) -> None: + hn = HumanName("Herr Klaus Schmidt") + self.m(hn.title, "Herr", hn) + self.m(hn.first, "Klaus", hn) + self.m(hn.last, "Schmidt", hn) diff --git a/tests/test_variations.py b/tests/test_variations.py index 6edccee..d574573 100644 --- a/tests/test_variations.py +++ b/tests/test_variations.py @@ -178,6 +178,13 @@ "U.S. District Judge Marc Thomas Treadwell", "Dra. Andréia da Silva", "Srta. Andréia da Silva", + "Herr Klaus Schmidt", + "Frau Anna Müller", + "Monsieur Jean Dupont", + "Señor Carlos García", + "Señora María García", + "Signor Marco Rossi", + "Mevrouw Anna de Vries", )