Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion nameparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,12 @@ def join_on_conjunctions(self, pieces: list[str], additional_parts_count: int =
try:
# if there are no more prefixes, look for a suffix to stop at
stop_at = next(iter(filter(self.is_suffix, pieces[i + 1:])))
j = pieces.index(stop_at)
# search from i + 1: filter() finds the value of stop_at
# in pieces[i+1:] but pieces.index() without a start
# argument searches from 0, so an earlier occurrence of
# the same token (e.g. a suffix token that also appears
# before the prefix) would be matched instead.
j = pieces.index(stop_at, i + 1)
new_piece = ' '.join(pieces[i:j])
pieces = pieces[:i] + [new_piece] + pieces[j:]
except StopIteration:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ dev = [
"dill (>=0.2.5)",
"sphinx (>=8)",
"mypy (>=2.1)",
"ruff (>=0.15)"
"ruff (>=0.15)",
"pytest-timeout>=2.4.0",
]

[tool.mypy]
Expand Down
46 changes: 46 additions & 0 deletions tests/test_prefixes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from nameparser import HumanName

from tests.base import HumanNameTestBase
Expand Down Expand Up @@ -43,6 +45,50 @@ def test_prefix_before_two_part_last_name_with_acronym_suffix(self) -> None:
self.m(hn.last, "von bergen wessels", hn)
self.m(hn.suffix, "M.D.", hn)

def test_title_before_and_after_prefixed_last_name(self) -> None:
# Issue #100: a repeated title/suffix token ("dr") before AND after a
# prefixed last name used to corrupt the middle name into
# " dr Vincent van" because the suffix-boundary lookup matched the
# LEADING "dr" instead of the trailing one.
hn = HumanName("dr Vincent van Gogh dr")
self.m(hn.title, "dr", hn)
self.m(hn.first, "Vincent", hn)
self.m(hn.middle, "", hn)
self.m(hn.last, "van Gogh", hn)
self.m(hn.suffix, "dr", hn)

def test_suffix_token_collision_with_two_word_prefix(self) -> None:
# Same fix as #100 but with a two-word prefix ("van der"). Exercises a
# different iteration count through the prefix-joining loop.
hn = HumanName("dr Vincent van der Gogh dr")
self.m(hn.title, "dr", hn)
self.m(hn.first, "Vincent", hn)
self.m(hn.middle, "", hn)
self.m(hn.last, "van der Gogh", hn)
self.m(hn.suffix, "dr", hn)

def test_title_before_and_after_prefixed_last_name_with_middle(self) -> None:
# The pre-fix bug corrupted the middle field; verify it is not disturbed
# when a genuine middle name is present alongside the repeated token.
hn = HumanName("dr Vincent James van Gogh dr")
self.m(hn.title, "dr", hn)
self.m(hn.first, "Vincent", hn)
self.m(hn.middle, "James", hn)
self.m(hn.last, "van Gogh", hn)
self.m(hn.suffix, "dr", hn)

@pytest.mark.timeout(2)
def test_many_repeated_prefixes_does_not_blow_up(self) -> None:
# Issue #108: a name with a long run of repeated prefixes used to grow
# the pieces list exponentially and exhaust memory. The 2-second timeout
# enforces this locally and in CI — if the test hangs, an exponential
# regression has been reintroduced.
name = "Jan " + "van der " * 30 + "Berg"
hn = HumanName(name)
self.assertFalse(hn.unparsable)
self.m(hn.first, "Jan", hn)
self.assertIn("Berg", hn.last)

def test_two_part_last_name_with_suffix_comma(self) -> None:
hn = HumanName("pennie von bergen wessels, III")
self.m(hn.first, "pennie", hn)
Expand Down
118 changes: 66 additions & 52 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.