Skip to content

Commit

Permalink
Update US_SSN CONTEXT and unit test (#1455)
Browse files Browse the repository at this point in the history
  • Loading branch information
claesmk authored Sep 29, 2024
1 parent c54ce2b commit 89ccadb
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ class UsSsnRecognizer(PatternRecognizer):
# "sec", # Task #603: Support keyphrases ("social sec")
"ssn",
"ssns",
"ssn#",
"ss#",
# "ssn#", # iss:1452 - a # does not work with LemmaContextAwareEnhancer
# "ss#", # iss:1452 - a # does not work with LemmaContextAwareEnhancer
"ssid",
]

Expand Down
12 changes: 9 additions & 3 deletions presidio-analyzer/tests/data/context_sentences_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,22 @@ IP_ADDRESS
my ip: 192.168.0.1

US_SSN
my ssn is 078-051120 07805-1120
my ssn is 078-051121

US_SSN
my social security number is 078051120

US_SSN
my social security number is 078-05-1120
my social security number is 078-05-1121

US_SSN
my social security number is 078051120
my social security number is 078051121

US_SSN
my ssns is 078-05-1121

US_SSN
my ssid is 078-05-1121

PHONE_NUMBER
my phone number is (425) 882-9090
Expand Down
6 changes: 3 additions & 3 deletions presidio-analyzer/tests/test_context_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def dataset(recognizers_map):
raise ValueError(f"bad entity type {entity_type}")

test_items.append((item, recognizer, [entity_type]))
# Currently we have 31 sentences, this is a sanity check
if not len(test_items) == 32:
raise ValueError(f"expected 31 context sentences but found {len(test_items)}")
# Currently we have 34 sentences, this is a sanity check
if not len(test_items) == 34:
raise ValueError(f"expected 34 context sentences but found {len(test_items)}")

yield test_items

Expand Down

0 comments on commit 89ccadb

Please sign in to comment.