From fc874240994a1d2a0a1e1798053dfc9f192e1b15 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Mon, 29 Dec 2025 13:05:39 -0500 Subject: [PATCH] feat(deduplication): ignore periods when comparing names This is a small extension to our deduplication logic so that periods in names will be ignored when comparing for dedupication. For example, a query for `3929 St Marks Avenue, Niagara Falls, ON, Canada` returns two duplicate addresses from OpenAddresses. One is sourced from a countrywide dataset, and another a regional dataset. One has a period after the abbreviation for Saint, one doesn't. We should probably evaluate ignoring most or all punctuation, but this fixes a somewhat common case for now. --- helper/diffPlaces.js | 2 +- test/unit/helper/diffPlaces.js | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js index c9c7ea5ab..0a3c195b7 100644 --- a/helper/diffPlaces.js +++ b/helper/diffPlaces.js @@ -386,7 +386,7 @@ function layerDependentNormalization(names, layer) { * lowercase characters and remove diacritics and some punctuation */ function normalizeString(str){ - return removeAccents(unicode.normalize(str)).toLowerCase().split(/[ ,-]+/).join(' '); + return removeAccents(unicode.normalize(str)).toLowerCase().split(/[ ,-.]+/).join(' '); } module.exports.isDifferent = isDifferent; diff --git a/test/unit/helper/diffPlaces.js b/test/unit/helper/diffPlaces.js index 4e095440c..42bc673ec 100644 --- a/test/unit/helper/diffPlaces.js +++ b/test/unit/helper/diffPlaces.js @@ -737,6 +737,7 @@ module.exports.tests.normalizeString = function (test, common) { t.equal(normalizeString('foo, bar'), 'foo bar'); t.equal(normalizeString('foo-bar'), 'foo bar'); t.equal(normalizeString('foo , - , - bar'), 'foo bar'); + t.equal(normalizeString('St. Marks'), 'st marks'); t.end(); });