Skip to content

Commit

Permalink
Addition of leniency parameter in predefined PhoneRecognizer (#1311)
Browse files Browse the repository at this point in the history
  • Loading branch information
VMD7 authored Feb 24, 2024
1 parent 173b527 commit 4c48b92
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class PhoneRecognizer(LocalRecognizer):
:param context: Base context words for enhancing the assurance scores.
:param supported_language: Language this recognizer supports
:param supported_regions: The regions for phone number matching and validation
:param leniency: The strictness level of phone number formats.
Accepts values from 0 to 3, where 0 is the lenient and 3 is the most strictest.
"""

SCORE = 0.4
Expand All @@ -30,9 +32,11 @@ def __init__(
supported_language: str = "en",
# For all regions, use phonenumbers.SUPPORTED_REGIONS
supported_regions=DEFAULT_SUPPORTED_REGIONS,
leniency: Optional[int] = 1,
):
context = context if context else self.CONTEXT
self.supported_regions = supported_regions
self.leniency = leniency
super().__init__(
supported_entities=self.get_supported_entities(),
supported_language=supported_language,
Expand All @@ -59,7 +63,8 @@ def analyze(
"""
results = []
for region in self.supported_regions:
for match in phonenumbers.PhoneNumberMatcher(text, region, leniency=1):
for match in phonenumbers.PhoneNumberMatcher(text, region,
leniency=self.leniency):
results += [
self._get_recognizer_result(match, text, region, nlp_artifacts)
]
Expand Down
41 changes: 41 additions & 0 deletions presidio-analyzer/tests/test_phone_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,44 @@ def test_when_all_phones_then_succeed(
assert len(results) == expected_len
for i, (res, (st_pos, fn_pos)) in enumerate(zip(results, expected_positions)):
assert_result(res, entities[i], st_pos, fn_pos, score)


@pytest.mark.parametrize(
"text, expected_len, entities, expected_positions, score, leniency",
[
# fmt: off
("My US number is (415) 555-0132, and my international one is415-555-0132",
1, ["PHONE_NUMBER"], ((16, 30), ), 0.4, 1),
("My US number is (415) 555-0132, and my international one is415-555-0132",
2, ["PHONE_NUMBER", "PHONE_NUMBER"], ((16, 30), (59, 71), ), 0.4, 0),
("My US number is (415) 555-0132, and my international one is 91-415-555-0132",
1, ["PHONE_NUMBER"], ((16, 30), ), 0.4, 2),
("My US number is (415) 555-0132, and my international one is 91-415-555-0132",
2, ["PHONE_NUMBER", "PHONE_NUMBER"], ((16, 30), (60, 75), ), 0.4, 1),
("My US number is (415) 555-0132, and my international one is +91 4155 550132",
1, ["PHONE_NUMBER"], ((16, 30), ), 0.4, 3),
("My US number is (415) 555-0132, and my international one is +91 4155 550132",
2, ["PHONE_NUMBER", "PHONE_NUMBER"], ((16, 30), (60, 75), ), 0.4, 2),
("My US number is (415) 555-0132, and my international one is +91 4155550132",
2, ["PHONE_NUMBER", "PHONE_NUMBER"], ((16, 30), (60, 74), ), 0.4, 3),
# fmt: on
],
)
def test_when_phone_with_leniency_then_succeed(
spacy_nlp_engine,
text,
expected_len,
entities,
expected_positions,
score,
leniency,
):
nlp_artifacts = spacy_nlp_engine.process_text(text, "en")
recognizer = PhoneRecognizer(leniency=leniency)
results = recognizer.analyze(text, entities, nlp_artifacts=nlp_artifacts)
assert len(results) == expected_len
for i, (res, (st_pos, fn_pos)) in enumerate(zip(results, expected_positions)):
assert_result(res, entities[i], st_pos, fn_pos, score)

0 comments on commit 4c48b92

Please sign in to comment.