diff --git a/.pipelines/templates/build-python.yml b/.pipelines/templates/build-python.yml index 03397075d..e45c2c134 100644 --- a/.pipelines/templates/build-python.yml +++ b/.pipelines/templates/build-python.yml @@ -3,14 +3,6 @@ parameters: - name: WORKING_FOLDER steps: - - task: Bash@3 - displayName: 'Linting: ${{ parameters.SERVICE }}' - inputs: - targetType: 'inline' - workingDirectory: ${{ parameters.WORKING_FOLDER }} - script: | - set -eux # fail on error - pipenv run flake8 - task: Bash@3 displayName: 'Unit tests: ${{ parameters.SERVICE }}' diff --git a/.pipelines/templates/lint-build-test.yml b/.pipelines/templates/lint-build-test.yml index d362273cf..7b325df83 100644 --- a/.pipelines/templates/lint-build-test.yml +++ b/.pipelines/templates/lint-build-test.yml @@ -11,6 +11,21 @@ stages: steps: - template: ./security-analysis.yml + - job: Linting + displayName: Linting + pool: + vmImage: 'ubuntu-latest' + steps: + - task: Bash@3 + displayName: 'Linting: Presidio for $(python.version)' + inputs: + targetType: 'inline' + script: | + set -eux # fail on error + pip install ruff + ruff check + + - job: TestAnalyzer displayName: Test Analyzer pool: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d110d9f99..93729503f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,29 +1,7 @@ repos: - - repo: https://github.com/ambv/black - rev: 22.3.0 - hooks: - - id: black - language_version: python3 - exclude: ^tests/ - - repo: https://github.com/pycqa/flake8 - rev: 3.9.0 - hooks: - - id: flake8 - additional_dependencies: [ - 'pep8-naming', - 'flake8-docstrings', - ] - args: ['--max-line-length=88', - '--docstring-convention=numpy', - # 'PEP8 Rules' to ignore in tests. Ignore documentation rules for all tests - # and ignore long lines / whitespaces for e2e-tests where we define jsons in-code. - '--per-file-ignores=**/tests/**.py:D docs/**.py:D e2e-tests/**.py:D,E501,W291,W293 docs/samples/deployments/spark/notebooks/*.py:E501,F821,D103', - '--extend-ignore= - E203, - D100, - D202, - ANN101, - ANN102, - ANN204, - ANN203' - ] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.3 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9e9b32c7d..37849776c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -38,7 +38,7 @@ To get started, refer to the documentation for [setting up a development environ ### How to test? -For Python, Presidio leverages `pytest` and `flake8`. See [this tutorial](docs/development.md#testing) on more information on testing presidio modules. +For Python, Presidio leverages `pytest` and `ruff`. See [this tutorial](docs/development.md#testing) on more information on testing presidio modules. ### Adding new recognizers for new PII types diff --git a/docs/development.md b/docs/development.md index 093d53841..9eff2dfb7 100644 --- a/docs/development.md +++ b/docs/development.md @@ -56,7 +56,7 @@ Follow these steps when starting to work on a Presidio service with Pipenv: 4. To run arbitrary scripts within the virtual env, start the command with `pipenv run`. For example: - 1. `pipenv run flake8` + 1. `pipenv run ruff check` 2. `pipenv run pip freeze` 3. `pipenv run python -m spacy download en_core_web_lg` @@ -233,39 +233,36 @@ run.bat ### Linting -Presidio services are PEP8 compliant and continuously enforced on style guide issues during the build process using `flake8`. +Presidio services are PEP8 compliant and continuously enforced on style guide issues during the build process using `ruff`, in turn running `flake8` and other linters. -Running flake8 locally, using `pipenv run flake8`, you can check for those issues prior to committing a change. +Running ruff locally, using `pipenv run ruff check`, you can check for those issues prior to committing a change. -In addition to the basic `flake8` functionality, Presidio uses the following extensions: +Ruff runs linters in addition to the basic `flake8` functionality, Presidio uses linters as part as ruff such as: - _pep8-naming_: To check that variable names are PEP8 compliant. - _flake8-docstrings_: To check that docstrings are compliant. ### Automatically format code and check for code styling -To make the linting process easier, you can use pre-commit hooks to verify and automatically format code upon a git commit, using `black`: +To make the linting process easier, you can use pre-commit hooks to verify and automatically format code upon a git commit, using `ruff-format`: 1. [Install pre-commit package manager locally.](https://pre-commit.com/#install) 2. From the project's root, enable pre-commit, installing git hooks in the `.git/` directory by running: `pre-commit install`. 3. Commit non PEP8 compliant code will cause commit failure and automatically - format your code using `black`, as well as checking code formatting using `flake8` + format your code using, as well as checking code formatting using `ruff` - ```sh - >git commit -m 'autoformat' presidio-analyzer/presidio_analyzer/predefined_recognizers/us_ssn_recognizer.py - - black....................................................................Failed - - hook id: black - - files were modified by this hook - - reformatted presidio-analyzer/presidio_analyzer/predefined_recognizers/us_ssn_recognizer.py - All done! - 1 file reformatted. - - flake8...................................................................Passed - - ``` +```sh +[INFO] Initializing environment for https://github.com/astral-sh/ruff-pre-commit. +[INFO] Installing environment for https://github.com/astral-sh/ruff-pre-commit. +[INFO] Once installed this environment will be reused. +[INFO] This may take a few minutes... +ruff.....................................................................Passed +ruff-format..............................................................Failed +- hook id: ruff-format +- files were modified by this hook + 5 files reformatted, 4 files left unchanged +``` 4. Committing again will finish successfully, with a well-formatted code. diff --git a/presidio-analyzer/Pipfile b/presidio-analyzer/Pipfile index e2c284e5a..273cbb510 100644 --- a/presidio-analyzer/Pipfile +++ b/presidio-analyzer/Pipfile @@ -20,8 +20,6 @@ azure-core = "*" [dev-packages] pytest = "*" pytest-mock = "*" -flake8= {version = ">=3.7.9"} -pep8-naming = "*" -flake8-docstrings = "*" +ruff = "*" pre_commit = "*" python-dotenv = "*" diff --git a/presidio-analyzer/presidio_analyzer/analyzer_engine.py b/presidio-analyzer/presidio_analyzer/analyzer_engine.py index 6d6450ac9..8c6555f88 100644 --- a/presidio-analyzer/presidio_analyzer/analyzer_engine.py +++ b/presidio-analyzer/presidio_analyzer/analyzer_engine.py @@ -187,7 +187,7 @@ def analyze( >>> results = analyzer.analyze(text='My phone number is 212-555-5555', entities=['PHONE_NUMBER'], language='en') # noqa D501 >>> print(results) [type: PHONE_NUMBER, start: 19, end: 31, score: 0.85] - """ + """ # noqa: E501 all_fields = not entities diff --git a/presidio-analyzer/presidio_analyzer/batch_analyzer_engine.py b/presidio-analyzer/presidio_analyzer/batch_analyzer_engine.py index 4a428595d..0569e3967 100644 --- a/presidio-analyzer/presidio_analyzer/batch_analyzer_engine.py +++ b/presidio-analyzer/presidio_analyzer/batch_analyzer_engine.py @@ -19,7 +19,6 @@ class BatchAnalyzerEngine: """ def __init__(self, analyzer_engine: Optional[AnalyzerEngine] = None): - self.analyzer_engine = analyzer_engine if not analyzer_engine: self.analyzer_engine = AnalyzerEngine() @@ -42,10 +41,10 @@ def analyze_iterator( texts = self._validate_types(texts) # Process the texts as batch for improved performance - nlp_artifacts_batch: Iterator[ - Tuple[str, NlpArtifacts] - ] = self.analyzer_engine.nlp_engine.process_batch( - texts=texts, language=language + nlp_artifacts_batch: Iterator[Tuple[str, NlpArtifacts]] = ( + self.analyzer_engine.nlp_engine.process_batch( + texts=texts, language=language + ) ) list_results = [] @@ -127,7 +126,7 @@ def analyze_dict( @staticmethod def _validate_types(value_iterator: Iterable[Any]) -> Iterator[Any]: for val in value_iterator: - if val and not type(val) in (int, float, bool, str): + if val and type(val) not in (int, float, bool, str): err_msg = ( "Analyzer.analyze_iterator only works " "on primitive types (int, float, bool, str). " diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py index d57a897f0..335077ad1 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py @@ -27,7 +27,8 @@ def __init__( azure_ai_endpoint: Optional[str] = None, ): """ - Wrapper for the PII detection in Azure AI Language + Wrap the PII detection in Azure AI Language. + :param supported_entities: List of supported entities for this recognizer. If None, all supported entities will be used. :param supported_language: Language code to use for the recognizer. @@ -36,8 +37,8 @@ def __init__( :param azure_ai_key: Azure AI for language key :param azure_ai_endpoint: Azure AI for language endpoint - For more info, see https://learn.microsoft.com/en-us/azure/ai-services/language-service/personally-identifiable-information/overview # noqa - """ + For more info, see https://learn.microsoft.com/en-us/azure/ai-services/language-service/personally-identifiable-information/overview + """ # noqa E501 super().__init__( supported_entities=supported_entities, @@ -73,7 +74,7 @@ def get_supported_entities(self) -> List[str]: @staticmethod def __get_azure_ai_supported_entities() -> List[str]: """Return the list of all supported entities for Azure AI Language.""" - from azure.ai.textanalytics._models import PiiEntityCategory # noqa + from azure.ai.textanalytics._models import PiiEntityCategory # noqa return [r.value.upper() for r in PiiEntityCategory] diff --git a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py index 347ef7a1b..04bd3558e 100644 --- a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py +++ b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py @@ -334,9 +334,9 @@ def add_pattern_recognizer_from_dict(self, recognizer_dict: Dict) -> None: :example: >>> registry = RecognizerRegistry() - >>> recognizer = { "name": "Titles Recognizer", "supported_language": "de","supported_entity": "TITLE", "deny_list": ["Mr.","Mrs."]} # noqa: E501 + >>> recognizer = { "name": "Titles Recognizer", "supported_language": "de","supported_entity": "TITLE", "deny_list": ["Mr.","Mrs."]} >>> registry.add_pattern_recognizer_from_dict(recognizer) - """ + """ # noqa: E501 recognizer = PatternRecognizer.from_dict(recognizer_dict) self.add_recognizer(recognizer) diff --git a/presidio-analyzer/setup.cfg b/presidio-analyzer/setup.cfg deleted file mode 100644 index 732559f8e..000000000 --- a/presidio-analyzer/setup.cfg +++ /dev/null @@ -1,10 +0,0 @@ -[flake8] -max-line-length = 88 -exclude = - .git, - __pycache__, - build, - dist, - tests -docstring-convention = numpy -extend-ignore = E203 D100 D202 ANN101 ANN102 ANN204 ANN203 TC \ No newline at end of file diff --git a/presidio-analyzer/setup.py b/presidio-analyzer/setup.py index 8d22913a6..1f418fa7d 100644 --- a/presidio-analyzer/setup.py +++ b/presidio-analyzer/setup.py @@ -1,4 +1,5 @@ """Setup.py for Presidio Analyzer.""" + import os.path from os import path @@ -27,7 +28,7 @@ "presidio_analyzer": ["py.typed", "conf/*"], }, trusted_host=["pypi.org"], - tests_require=["pytest", "flake8>=3.7.9"], + tests_require=["pytest", "ruff"], install_requires=[ "spacy>=3.4.4, <4.0.0", "regex", diff --git a/presidio-anonymizer/Pipfile b/presidio-anonymizer/Pipfile index 5faf4dbb0..62fb39e79 100644 --- a/presidio-anonymizer/Pipfile +++ b/presidio-anonymizer/Pipfile @@ -9,7 +9,5 @@ pycryptodome = ">=3.10,<4.0.0" [dev-packages] pytest = "*" -flake8 = { version = ">=3.7.9" } -pep8-naming = "*" -flake8-docstrings = "*" +ruff = "*" pre_commit = "*" diff --git a/presidio-anonymizer/setup.cfg b/presidio-anonymizer/setup.cfg deleted file mode 100644 index 8fe2404bd..000000000 --- a/presidio-anonymizer/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[flake8] -max-line-length = 88 -docstring-convention = numpy -per-file-ignores = - tests/*: D -extend-ignore = E203 D100 D202 ANN101 ANN102 ANN204 ANN203 \ No newline at end of file diff --git a/presidio-anonymizer/setup.py b/presidio-anonymizer/setup.py index 4083a0a8f..1b166e768 100644 --- a/presidio-anonymizer/setup.py +++ b/presidio-anonymizer/setup.py @@ -5,7 +5,7 @@ from setuptools import setup, find_packages -test_requirements = ["pytest>=3", "flake8==3.7.9"] +test_requirements = ["pytest>=3", "ruff"] __version__ = "" this_directory = path.abspath(path.dirname(__file__)) diff --git a/presidio-cli/Pipfile b/presidio-cli/Pipfile index 86304de0e..2a620ad56 100644 --- a/presidio-cli/Pipfile +++ b/presidio-cli/Pipfile @@ -5,7 +5,7 @@ pathspec = "*" [dev-packages] pytest = ">=6" -flake8= {version = ">=3.7"} +ruff = "*" pre_commit = ">=2" pytest-cov = "*" pytest-mock = "*" diff --git a/presidio-cli/pyproject.toml b/presidio-cli/pyproject.toml new file mode 100644 index 000000000..d382dcb36 --- /dev/null +++ b/presidio-cli/pyproject.toml @@ -0,0 +1,6 @@ +[tool.ruff] +line-length = 120 + +# To be fixed: +[tool.ruff.lint] +ignore = ["D205", "D400", "E721"] \ No newline at end of file diff --git a/presidio-cli/setup.cfg b/presidio-cli/setup.cfg index 9669d359d..0c44ae3c9 100644 --- a/presidio-cli/setup.cfg +++ b/presidio-cli/setup.cfg @@ -1,12 +1,6 @@ [bdist_wheel] universal = 1 -[flake8] -import-order-style = pep8 -application-import-names = presidio_cli -ignore = E203,W503 -max-line-length = 120 - [build_sphinx] all-files = 1 source-dir = docs diff --git a/presidio-cli/setup.py b/presidio-cli/setup.py index 2b8458ac2..a92bf4f9d 100644 --- a/presidio-cli/setup.py +++ b/presidio-cli/setup.py @@ -31,5 +31,5 @@ ], install_requires=["presidio-analyzer>=2.2", "pyyaml", "pathspec"], trusted_host=["pypi.org"], - tests_require=["pytest", "flake8>=3.7.9"], + tests_require=["pytest", "ruff"], ) diff --git a/presidio-image-redactor/Pipfile b/presidio-image-redactor/Pipfile index 6be656a4b..c096df40d 100644 --- a/presidio-image-redactor/Pipfile +++ b/presidio-image-redactor/Pipfile @@ -19,6 +19,4 @@ azure-ai-formrecognizer = ">=3.3.0,<4.0.0" [dev-packages] pytest = "*" pytest-mock = "*" -flake8 = { version = ">=3.9.2" } -pep8-naming = "*" -flake8-docstrings = "*" +ruff = "*" diff --git a/presidio-image-redactor/pyproject.toml b/presidio-image-redactor/pyproject.toml new file mode 100644 index 000000000..c23a466da --- /dev/null +++ b/presidio-image-redactor/pyproject.toml @@ -0,0 +1,6 @@ +[tool.ruff] +line-length = 120 + +# To be fixed: +[tool.ruff.lint] +ignore = ["F401", "F841", "E402", "E721", "F541", "E712", "F811", "E722"] \ No newline at end of file diff --git a/presidio-image-redactor/setup.cfg b/presidio-image-redactor/setup.cfg deleted file mode 100644 index 2afee04eb..000000000 --- a/presidio-image-redactor/setup.cfg +++ /dev/null @@ -1,10 +0,0 @@ -[flake8] -max-line-length = 88 -exclude = - .git, - __pycache__, - build, - dist, - tests -docstring-convention = numpy -extend-ignore = E203 D100 D202 D407 ANN101 ANN102 ANN204 ANN203 \ No newline at end of file diff --git a/presidio-image-redactor/setup.py b/presidio-image-redactor/setup.py index a671efa9a..06017e3d0 100644 --- a/presidio-image-redactor/setup.py +++ b/presidio-image-redactor/setup.py @@ -1,4 +1,5 @@ """Setup.py for Presidio Image Redactor.""" + import os.path from os import path @@ -12,10 +13,10 @@ "pydicom>=2.3.0", "pypng>=0.20220715.0", "azure-ai-formrecognizer>=3.3.0,<4.0.0", - "opencv-python>=4.0.0,<5.0.0" + "opencv-python>=4.0.0,<5.0.0", ] -test_requirements = ["pytest>=3", "pytest-mock>=3.10.0", "flake8>=3.7.9"] +test_requirements = ["pytest>=3", "pytest-mock>=3.10.0", "ruff"] __version__ = "" this_directory = path.abspath(path.dirname(__file__)) diff --git a/presidio-structured/Pipfile b/presidio-structured/Pipfile index 4205f8b63..1a21a0f57 100644 --- a/presidio-structured/Pipfile +++ b/presidio-structured/Pipfile @@ -11,7 +11,5 @@ pandas = ">=1.5.2" [dev-packages] pytest = "*" -flake8 = { version = ">=3.7.9" } -pep8-naming = "*" -flake8-docstrings = "*" +ruff = "*" pre_commit = "*" diff --git a/presidio-structured/pyproject.toml b/presidio-structured/pyproject.toml new file mode 100644 index 000000000..29520ad59 --- /dev/null +++ b/presidio-structured/pyproject.toml @@ -0,0 +1,6 @@ +[tool.ruff] +line-length = 120 + +# To be fixed: +[tool.ruff.lint] +ignore = ["F841"] diff --git a/presidio-structured/setup.cfg b/presidio-structured/setup.cfg deleted file mode 100644 index 732559f8e..000000000 --- a/presidio-structured/setup.cfg +++ /dev/null @@ -1,10 +0,0 @@ -[flake8] -max-line-length = 88 -exclude = - .git, - __pycache__, - build, - dist, - tests -docstring-convention = numpy -extend-ignore = E203 D100 D202 ANN101 ANN102 ANN204 ANN203 TC \ No newline at end of file diff --git a/presidio-structured/setup.py b/presidio-structured/setup.py index 93eeb41db..aaf047c92 100644 --- a/presidio-structured/setup.py +++ b/presidio-structured/setup.py @@ -5,7 +5,7 @@ from setuptools import setup, find_packages -test_requirements = ["pytest>=3", "flake8==3.7.9"] +test_requirements = ["pytest>=3", "ruff"] __version__ = "" this_directory = path.abspath(path.dirname(__file__)) @@ -15,9 +15,7 @@ long_description = f.read() try: - with open( - os.path.join(parent_directory, "PRESIDIO-STRUCTURED-VERSION") - ) as version_file: + with open(os.path.join(parent_directory, "PRESIDIO-STRUCTURED-VERSION")) as version_file: __version__ = version_file.read().strip() except Exception: __version__ = os.environ.get("PRESIDIO_STRUCTURED_VERSION", "0.0.1-alpha") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..f9524ba3a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,79 @@ +[tool.ruff] + +exclude = [ + # Ruff recommended: + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + + # Project specific: + "docs/samples", + "e2e-tests/", + "*/tests/*" +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +[tool.ruff.lint] +select = ["E", "F", "I", "D", "N", "W", + # To be added: + # "SIM", "UP", "ANN", "B" +] +ignore = ["E203", "D100", "D202", "D407", "ANN101", "ANN102", "ANN204", + # To be fixed: + "I001", "E721", "N818"] + +fixable = ["ALL"] + +## Allow unused variables when underscore-prefixed. +#dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + + +## 3. Avoid trying to fix flake8-bugbear (`B`) violations. +#unfixable = ["B"] + +## 4. Ignore `E402` (import violations) in all `__init__.py` files, and in select subdirectories. +#[tool.ruff.lint.per-file-ignores] +#"__init__.py" = ["E402"] +#"**/{tests,docs,tools}/*" = ["E402"] + +[tool.ruff.lint.pydocstyle] +convention = "numpy"