Skip to content

Commit

Permalink
Avoid disambiguation from street names and city names in fr_dept_name…
Browse files Browse the repository at this point in the history
…_to_dept_code
  • Loading branch information
Roxane committed Jan 8, 2025
1 parent aab94a4 commit 25712e5
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)

### Fixed

- Avoid disambiguation from street names and city names in fr_dept_name_to_dept_code

## [5.3.1] - 2024-11-06

### Added
Expand Down
27 changes: 27 additions & 0 deletions geoconvert/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,38 @@ def fr_postcode_to_dept_code(text):
# Keep backward compatibility
address_to_zipcode = fr_postcode_to_dept_code

# Avoid "rue de Paris" situations
fr_street_names_re = "|".join(
(
"boulevard",
"avenue",
"chemin",
"rue",
"route",
"impasse",
"place",
"passage",
"ruelle",
"quai",
"all.e",
)
)
fr_street_name_cleaning_re = re.compile(
rf"({fr_street_names_re})[^\d\(,\n-]{{,20}}", flags=re.I
)
# Avoid "Ville-sur-Loire" situations
fr_town_name_cleaning_re = re.compile(r"\w+.\b(sous|sur)\b.(\w+)", flags=re.I)


def fr_dept_name_to_dept_code(text):
"""
Return the departement number from the departement name
"""
# Avoid "rue de Paris" situations
text = fr_street_name_cleaning_re.sub("", text)
# Avoid "Ville-sur-Loire" situations
text = fr_town_name_cleaning_re.sub("", text)

# There is no space in french dept names, but hyphens instead.
text = safe_string(text).replace(" ", "-")

Expand Down
17 changes: 11 additions & 6 deletions tests/test_subdivisions/test_france.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,17 @@ def test_fr_postcode_to_dept_code(self, input_data, expected):
("Loire Atlanti)que", "44"),
("Yonne", "89"),
("Saint Pierre et Miquelon", "975"),
("Tout savoir sur Saint Barthélemy", "977"),
("Tout savoir sur saint-barthelemy", "977"),
("Tout savoir sur saint Barthélémy", "977"),
# There may be some mistakes, so be careful what is passed
("Rue de la Réunion, 75000 Paris", "974"),
("Rue de l'Orne, 44800 Saint-Herblain", "61"),
("Tout savoir à propos de Saint Barthélemy", "977"),
("Tout savoir à propos de saint-barthelemy", "977"),
("Tout savoir à propos de saint Barthélémy", "977"),
# Avoid disambiguations
# due to street names
("Rue de la Réunion 61000 Alencon", None), # "réunion" could mean 974
("Rue de Paris, Nantes", None), # "paris" could mean 75
("Rue de l'Orne, 44800 Saint-Herblain", None), # "Orne" could be 61
# due to city names
("Sully sur Loire (Loiret)", "45"), # "Loire" could mean 42
("Gournay-sous-Marne (Seine saint Denis)", "93"), # "Marne" could be 51
],
)
def test_fr_dept_name_dept_code(self, input_data, expected):
Expand Down

0 comments on commit 25712e5

Please sign in to comment.