Skip to content

Commit

Permalink
Add support for allow_list, allow_list_match, regex_flags in REST API (
Browse files Browse the repository at this point in the history
  • Loading branch information
hdw868 authored Nov 11, 2024
1 parent ce63783 commit fde30dd
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 0 deletions.
92 changes: 92 additions & 0 deletions e2e-tests/tests/test_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,3 +480,95 @@ def test_given_ad_hoc_deny_list_recognizer_the_right_entities_are_returned():
assert equal_json_strings(
expected_response, response_content, ignore_keys=["recognition_metadata"]
)


@pytest.mark.api
def test_given_allow_list_then_no_entity_is_returned():
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": ["[email protected]"]
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content
)


@pytest.mark.api
def test_given_allow_list_with_regex_match_then_no_entity_is_returned():
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": [".*@github.com"],
"allow_list_match": "regex"
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content
)


@pytest.mark.api
def test_given_allow_list_without_setting_allow_list_match_then_normal_entity_is_returned():
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": [".*@github.com"]
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[
{"entity_type": "EMAIL_ADDRESS", "start": 7, "end": 23, "score": 0.85, "analysis_explanation":null}
]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content, ignore_keys=["recognition_metadata"]
)


@pytest.mark.api
def test_given_regex_flags_and_normal_entities_are_returned():
# case sensitive flags are turned off, GitHub != github
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": [".*@github.com"],
"allow_list_match": "regex",
"regex_flags": 0
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[
{"entity_type": "EMAIL_ADDRESS", "start": 7, "end": 23, "score": 0.85, "analysis_explanation":null}
]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content, ignore_keys=["recognition_metadata"]
)
3 changes: 3 additions & 0 deletions presidio-analyzer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def analyze() -> Tuple[str, int]:
return_decision_process=req_data.return_decision_process,
ad_hoc_recognizers=req_data.ad_hoc_recognizers,
context=req_data.context,
allow_list=req_data.allow_list,
allow_list_match=req_data.allow_list_match,
regex_flags=req_data.regex_flags
)

return Response(
Expand Down
5 changes: 5 additions & 0 deletions presidio-analyzer/presidio_analyzer/analyzer_request.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from typing import Dict

from presidio_analyzer import PatternRecognizer
Expand Down Expand Up @@ -34,3 +35,7 @@ def __init__(self, req_data: Dict):
PatternRecognizer.from_dict(rec) for rec in ad_hoc_recognizers
]
self.context = req_data.get("context")
self.allow_list = req_data.get("allow_list")
self.allow_list_match = req_data.get("allow_list_match", "exact")
self.regex_flags = req_data.get("regex_flags",
re.DOTALL | re.MULTILINE | re.IGNORECASE)

0 comments on commit fde30dd

Please sign in to comment.