Skip to content

Commit

Permalink
Reduce memory usage of Analyzer test suite (#1429)
Browse files Browse the repository at this point in the history
  • Loading branch information
hhobson authored Aug 28, 2024
1 parent 6c51464 commit 9696b9e
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 54 deletions.
50 changes: 16 additions & 34 deletions presidio-analyzer/tests/test_analyzer_engine.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
from abc import ABC
from contextlib import nullcontext
from typing import List, Optional
import re

Expand Down Expand Up @@ -58,11 +59,6 @@ def unit_test_guid():
return "00000000-0000-0000-0000-000000000000"


@pytest.fixture(scope="module")
def nlp_engine(nlp_engines):
return nlp_engines["spacy_en"]


def test_simple():
dic = {
"text": "John Smith drivers license is AC432223",
Expand Down Expand Up @@ -91,39 +87,25 @@ def test_when_analyze_with_predefined_recognizers_then_return_results(
assert len(results) == 1
assert_result(results[0], "CREDIT_CARD", 14, 33, max_score)

def test_when_analyze_with_unsupported_language_must_match(
):
with pytest.raises(ValueError):
registry = RecognizerRegistryProvider(registry_configuration={"supported_languages": ["en"]}).create_recognizer_registry()
AnalyzerEngine(
registry=registry,
supported_languages=["es", "de"]
)

with pytest.raises(ValueError):
registry = RecognizerRegistryProvider().create_recognizer_registry()
@pytest.mark.parametrize(
"registry_config,analyzer_lang,expectation",
[
({"supported_languages": ["en"]}, ["es", "de"], pytest.raises(ValueError)),
(None, ["es", "de"], pytest.raises(ValueError)),
({"supported_languages": ["es", "de"]}, None, pytest.raises(ValueError)),
({"supported_languages": ["es", "de"]}, ["de", "es"], nullcontext()),
(None, None, nullcontext()),
]
)
def test_when_analyze_with_unsupported_language_must_match(registry_config, analyzer_lang, expectation):
with expectation:
registry = RecognizerRegistryProvider(registry_configuration=registry_config).create_recognizer_registry()
AnalyzerEngine(
registry=registry,
supported_languages=["es", "de"]
)

with pytest.raises(ValueError):
registry = RecognizerRegistryProvider(registry_configuration={"supported_languages": ["es", "de"]}).create_recognizer_registry()
AnalyzerEngine(
registry=registry
supported_languages=analyzer_lang,
nlp_engine=NlpEngineMock(),
)

registry = RecognizerRegistryProvider(registry_configuration={"supported_languages": ["es", "de"]}).create_recognizer_registry()
AnalyzerEngine(
registry=registry,
supported_languages=["de", "es"]
)

registry = RecognizerRegistryProvider().create_recognizer_registry()
AnalyzerEngine(
registry=registry
)

def test_when_analyze_with_defaults_success(
):
registry = RecognizerRegistryProvider().create_recognizer_registry()
Expand Down
11 changes: 0 additions & 11 deletions presidio-analyzer/tests/test_nlp_engine_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,6 @@ def nlp_configuration_dict() -> Dict:
return nlp_configuration


@pytest.fixture(scope="session")
def ner_model_configuration_dict() -> Dict:
ner_model_configuration = {
"nlp_engine_name": "transformers",
"aggregation_strategy": "simple",
"alignment_mode": "strict",
"low_score_entity_names": ["O"],
}
return ner_model_configuration


def test_when_create_nlp_engine__then_return_default_configuration():
provider = NlpEngineProvider()
engine = provider.create_engine()
Expand Down
5 changes: 0 additions & 5 deletions presidio-analyzer/tests/test_recognizer_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
from presidio_analyzer.predefined_recognizers import SpacyRecognizer


@pytest.fixture(scope="module")
def request_id():
return "UT"


def create_mock_pattern_recognizer(lang, entity, name):
return PatternRecognizer(
supported_entity=entity,
Expand Down
5 changes: 2 additions & 3 deletions presidio-analyzer/tests/test_stanza_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ def nlp_recognizer(nlp_recognizers):
return nlp_recognizers.get("stanza", None)


def prepare_and_analyze(nlp, recognizer, text, ents):
nlp.load()
def prepare_and_analyze(nlp, recognizer, text, entities):
nlp_artifacts = nlp.process_text(text, "en")
results = recognizer.analyze(text, ents, nlp_artifacts)
results = recognizer.analyze(text, entities, nlp_artifacts)
return results


Expand Down
1 change: 0 additions & 1 deletion presidio-analyzer/tests/test_transformers_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def nlp_engine(nlp_engines):


def prepare_and_analyze(nlp, recognizer, text, entities):
nlp.load()
nlp_artifacts = nlp.process_text(text, "en")
results = recognizer.analyze(text, entities, nlp_artifacts)
return results
Expand Down

0 comments on commit 9696b9e

Please sign in to comment.