Skip to content

Commit

Permalink
Bug/azure ai language context (#1458)
Browse files Browse the repository at this point in the history
  • Loading branch information
omri374 authored Oct 2, 2024
1 parent 89ccadb commit b9f6cba
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
ta_client: Optional["TextAnalyticsClient"] = None,
azure_ai_key: Optional[str] = None,
azure_ai_endpoint: Optional[str] = None,
**kwargs
):
"""
Wrap the PII detection in Azure AI Language.
Expand All @@ -36,6 +37,7 @@ def __init__(
the client will be created using the key and endpoint.
:param azure_ai_key: Azure AI for language key
:param azure_ai_endpoint: Azure AI for language endpoint
:param kwargs: Additional arguments required by the parent class
For more info, see https://learn.microsoft.com/en-us/azure/ai-services/language-service/personally-identifiable-information/overview
""" # noqa E501
Expand All @@ -45,6 +47,7 @@ def __init__(
supported_language=supported_language,
name="Azure AI Language PII",
version="5.2.0",
**kwargs
)

is_available = bool(TextAnalyticsClient)
Expand Down
18 changes: 18 additions & 0 deletions presidio-analyzer/tests/conf/test_azure_ai_language_reco.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
recognizer_registry:
global_regex_flags: 26
recognizers:
- name: MockAzureAiLanguageRecognizer
type: predefined
ta_client: "test" # This is a placeholder for testing purposes


supported_languages:
- en
default_score_threshold: 0.7

nlp_configuration:
nlp_engine_name: spacy
models:
-
lang_code: en
model_name: en_core_web_lg
44 changes: 37 additions & 7 deletions presidio-analyzer/tests/test_analyzer_engine_provider.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import re
from pathlib import Path
from typing import List

from presidio_analyzer import AnalyzerEngineProvider
from presidio_analyzer.nlp_engine import SpacyNlpEngine
from presidio_analyzer import AnalyzerEngineProvider, RecognizerResult
from presidio_analyzer.nlp_engine import SpacyNlpEngine, NlpArtifacts

from presidio_analyzer.nlp_engine.transformers_nlp_engine import TransformersNlpEngine
from presidio_analyzer.predefined_recognizers import AzureAILanguageRecognizer


def get_full_paths(analyzer_yaml, nlp_engine_yaml=None, recognizer_registry_yaml=None):
this_path = Path(__file__).parent.absolute()
Expand Down Expand Up @@ -83,7 +86,9 @@ def test_analyzer_engine_provider_configuration_file():
assert engine.nlp_engine.engine_name == "spacy"


def test_analyzer_engine_provider_configuration_file_missing_values_expect_defaults(mandatory_recognizers):
def test_analyzer_engine_provider_configuration_file_missing_values_expect_defaults(
mandatory_recognizers,
):
test_yaml, _, _ = get_full_paths("conf/test_analyzer_engine_missing_values.yaml")
provider = AnalyzerEngineProvider(test_yaml)
engine = provider.create_engine()
Expand Down Expand Up @@ -133,10 +138,6 @@ def test_analyzer_engine_provider_with_files_per_provider():
recognizer_registry_conf_file=recognizer_registry_yaml,
)

provider = AnalyzerEngineProvider(analyzer_engine_conf_file=analyzer_yaml,
nlp_engine_conf_file=nlp_engine_yaml,
recognizer_registry_conf_file=recognizer_registry_yaml)

analyzer_engine = provider.create_engine()

# assert analyzer instance is correct
Expand All @@ -153,3 +154,32 @@ def test_analyzer_engine_provider_with_files_per_provider():
recognizer_registry = analyzer_engine.registry
assert len(recognizer_registry.recognizers) == 6
assert recognizer_registry.supported_languages == ["en", "es"]


def test_analyzer_engine_provider_with_azure_ai_language():
analyzer_yaml, _, _ = get_full_paths(
"conf/test_azure_ai_language_reco.yaml",
)

class MockAzureAiLanguageRecognizer(AzureAILanguageRecognizer):
def analyze(
self,
text: str,
entities: List[str] = None,
nlp_artifacts: NlpArtifacts = None,
) -> List[RecognizerResult]:
return [RecognizerResult(entity_type="PERSON", start=0, end=4, score=0.9)]

provider = AnalyzerEngineProvider(analyzer_engine_conf_file=analyzer_yaml)

analyzer_engine = provider.create_engine()

azure_ai_recognizers = [
rec
for rec in analyzer_engine.registry.recognizers
if rec.name == "Azure AI Language PII"
]

assert len(azure_ai_recognizers) == 1

assert len(analyzer_engine.analyze("This is a test", language="en")) > 0

0 comments on commit b9f6cba

Please sign in to comment.