From 177cc487d619beb00913cf92477d59cfa60e35fe Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 02:46:36 -0700 Subject: [PATCH 1/2] feat: add international honorifics to titles (#99) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds German/Dutch/Scandinavian (herr, frau, hr, dhr, mevr, mevrouw, fru, frøken, vrouwe, etc.), French (monsieur, monseigneur), Portuguese (senhor, senhora, senhorita), Spanish (señor, señora, señorita, sres, srtas, etc.), and Italian (signor, signora, signorina, etc.) honorifics. Omits 'senhoro' (not a real Portuguese word) and 'frou' (unrecognized title). Skips 'fr' and 'mx' which were already present. Co-Authored-By: Claude Sonnet 4.6 --- nameparser/config/titles.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py index 1467a53..5845248 100644 --- a/nameparser/config/titles.py +++ b/nameparser/config/titles.py @@ -230,6 +230,7 @@ 'designer', 'detective', 'developer', + 'dhr', 'diplomat', 'dir', 'director', @@ -289,7 +290,11 @@ 'forester', 'founder', 'fr', + 'frau', 'friar', + 'frk', + 'frøken', + 'fru', 'gaf', 'gen', 'general', @@ -312,6 +317,10 @@ 'heiress', 'her', 'hereditary', + 'heren', + 'herr', + 'herren', + 'herrn', 'high', 'highness', 'his', @@ -323,6 +332,7 @@ 'honorable', 'honourable', 'host', + 'hr', 'illustrator', 'imam', 'industrialist', @@ -398,6 +408,9 @@ 'merchant', 'met', 'metropolitan', + 'mevr', + 'mevrouw', + 'mevrouwe', 'mg', 'mgr', 'mgysgt', @@ -412,6 +425,8 @@ 'mobster', 'model', 'monk', + 'monseigneur', + 'monsieur', 'monsignor', 'most', 'mountaineer', @@ -538,7 +553,15 @@ 'security', 'seigneur', 'senator', + 'senhor', + 'senhora', + 'senhorita', 'senior', + 'señor', + 'señora', + 'señores', + 'señorita', + 'señoritas', 'senior-judge', 'sergeant', 'servant', @@ -551,6 +574,10 @@ 'sheikh', 'sheriff', 'siddha', + 'signor', + 'signora', + 'signore', + 'signorina', 'singer', 'singer-songwriter', 'sma', @@ -569,6 +596,8 @@ 'sr', 'sra', 'srta', + 'srtas', + 'sres', 'ssg', 'ssgt', 'st', @@ -619,6 +648,7 @@ 'vizier', 'vocalist', 'voice', + 'vrouwe', 'warden', 'warrant', 'wing', From 45ceb802e5d6c99cc055699fd146719d2c8ad051 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 03:01:47 -0700 Subject: [PATCH 2/2] tests: add non-ASCII title normalization and variation tests Co-Authored-By: Claude Sonnet 4.6 --- tests/test_titles.py | 33 +++++++++++++++++++++++++++++++++ tests/test_variations.py | 7 +++++++ 2 files changed, 40 insertions(+) diff --git a/tests/test_titles.py b/tests/test_titles.py index 7b06121..5bad578 100644 --- a/tests/test_titles.py +++ b/tests/test_titles.py @@ -239,3 +239,36 @@ def test_2_same_prefixes_in_the_name(self) -> None: self.m(hh.first, "Vincent", hh) self.m(hh.middle, "van Gogh", hh) self.m(hh.last, "van Beethoven", hh) + + # Non-ASCII title normalization — confirm diacritic titles survive + # the lowercase lookup path end-to-end. + + def test_señora_non_ascii_title(self) -> None: + hn = HumanName("Señora María García") + self.m(hn.title, "Señora", hn) + self.m(hn.first, "María", hn) + self.m(hn.last, "García", hn) + + def test_señora_lowercase_non_ascii_title(self) -> None: + hn = HumanName("señora María García") + self.m(hn.title, "señora", hn) + self.m(hn.first, "María", hn) + self.m(hn.last, "García", hn) + + def test_frøken_non_ascii_title(self) -> None: + hn = HumanName("Frøken Jensen") + self.m(hn.title, "Frøken", hn) + self.m(hn.first, "", hn) + self.m(hn.last, "Jensen", hn) + + def test_herr_title_not_first_name(self) -> None: + hn = HumanName("Herr Schmidt") + self.m(hn.title, "Herr", hn) + self.m(hn.first, "", hn) + self.m(hn.last, "Schmidt", hn) + + def test_herr_title_with_first_name(self) -> None: + hn = HumanName("Herr Klaus Schmidt") + self.m(hn.title, "Herr", hn) + self.m(hn.first, "Klaus", hn) + self.m(hn.last, "Schmidt", hn) diff --git a/tests/test_variations.py b/tests/test_variations.py index 6edccee..d574573 100644 --- a/tests/test_variations.py +++ b/tests/test_variations.py @@ -178,6 +178,13 @@ "U.S. District Judge Marc Thomas Treadwell", "Dra. Andréia da Silva", "Srta. Andréia da Silva", + "Herr Klaus Schmidt", + "Frau Anna Müller", + "Monsieur Jean Dupont", + "Señor Carlos García", + "Señora María García", + "Signor Marco Rossi", + "Mevrouw Anna de Vries", )