microsoft · SharonHart · May 12, 2024 · May 9, 2024 · May 9, 2024 · May 9, 2024
diff --git a/.pipelines/templates/build-python.yml b/.pipelines/templates/build-python.yml
@@ -3,14 +3,6 @@ parameters:
     - name: WORKING_FOLDER
 
 steps:
-    - task: Bash@3
-      displayName: 'Linting: ${{ parameters.SERVICE }}'
-      inputs:
-          targetType: 'inline'
-          workingDirectory: ${{ parameters.WORKING_FOLDER }}
-          script: |
-              set -eux  # fail on error
-              pipenv run flake8
 
     - task: Bash@3
       displayName: 'Unit tests: ${{ parameters.SERVICE }}'

diff --git a/.pipelines/templates/lint-build-test.yml b/.pipelines/templates/lint-build-test.yml
@@ -11,6 +11,21 @@ stages:
             steps:
                 - template: ./security-analysis.yml
 
+          - job: Linting
+            displayName: Linting
+            pool:
+              vmImage: 'ubuntu-latest'
+            steps:
+              - task: Bash@3
+                displayName: 'Linting: Presidio for $(python.version)'
+                inputs:
+                  targetType: 'inline'
+                  script: |
+                    set -eux  # fail on error
+                    pip install ruff
+                    ruff check
+
+
           - job: TestAnalyzer
             displayName: Test Analyzer
             pool:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,29 +1,7 @@
 repos:
-  -   repo: https://github.com/ambv/black
-      rev: 22.3.0
-      hooks:
-        - id: black
-          language_version: python3
-      exclude: ^tests/
-  -   repo: https://github.com/pycqa/flake8
-      rev: 3.9.0
-      hooks:
-        - id: flake8
-          additional_dependencies: [
-              'pep8-naming',
-              'flake8-docstrings',
-          ]
-          args: ['--max-line-length=88', 
-                 '--docstring-convention=numpy',
-                  # 'PEP8 Rules' to ignore in tests. Ignore documentation rules for all tests
-                  # and ignore long lines / whitespaces for e2e-tests where we define jsons in-code.
-                 '--per-file-ignores=**/tests/**.py:D docs/**.py:D e2e-tests/**.py:D,E501,W291,W293 docs/samples/deployments/spark/notebooks/*.py:E501,F821,D103',
-                 '--extend-ignore=
-                 		E203,
-                 		D100,
-                 		D202,
-                 		ANN101,
-                 		ANN102,
-                 		ANN204,
-                 		ANN203'
-                 		]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.3
+    hooks:
+      - id: ruff
+        args: [ --fix ]
+      - id: ruff-format
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -38,7 +38,7 @@ To get started, refer to the documentation for [setting up a development environ
 
 ### How to test?
 
-For Python, Presidio leverages `pytest` and `flake8`. See [this tutorial](docs/development.md#testing) on more information on testing presidio modules.
+For Python, Presidio leverages `pytest` and `ruff`. See [this tutorial](docs/development.md#testing) on more information on testing presidio modules.
 
 ### Adding new recognizers for new PII types
 

diff --git a/docs/development.md b/docs/development.md
@@ -56,7 +56,7 @@ Follow these steps when starting to work on a Presidio service with Pipenv:
 
 4. To run arbitrary scripts within the virtual env, start the command with
     `pipenv run`. For example:
-    1. `pipenv run flake8`
+    1. `pipenv run ruff check`
     2. `pipenv run pip freeze`
     3. `pipenv run python -m spacy download en_core_web_lg`
 
@@ -233,39 +233,36 @@ run.bat
 
 ### Linting
 
-Presidio services are PEP8 compliant and continuously enforced on style guide issues during the build process using `flake8`.
+Presidio services are PEP8 compliant and continuously enforced on style guide issues during the build process using `ruff`, in turn running `flake8` and other linters.
 
-Running flake8 locally, using `pipenv run flake8`, you can check for those issues prior to committing a change.
+Running ruff locally, using `pipenv run ruff check`, you can check for those issues prior to committing a change.
 
-In addition to the basic `flake8` functionality, Presidio uses the following extensions:
+Ruff runs linters in addition to the basic `flake8` functionality, Presidio uses linters as part as ruff such as:
 
 - _pep8-naming_: To check that variable names are PEP8 compliant.
 - _flake8-docstrings_: To check that docstrings are compliant.
 
 ### Automatically format code and check for code styling
 
-To make the linting process easier, you can use pre-commit hooks to verify and automatically format code upon a git commit, using `black`:
+To make the linting process easier, you can use pre-commit hooks to verify and automatically format code upon a git commit, using `ruff-format`:
 
 1. [Install pre-commit package manager locally.](https://pre-commit.com/#install)
 
 2. From the project's root, enable pre-commit, installing git hooks in the `.git/` directory by running: `pre-commit install`.
 
 3. Commit non PEP8 compliant code will cause commit failure and automatically
-    format your code using `black`, as well as checking code formatting using `flake8`
+    format your code using, as well as checking code formatting using `ruff`
 
-        ```sh
-        >git commit -m 'autoformat' presidio-analyzer/presidio_analyzer/predefined_recognizers/us_ssn_recognizer.py
-
-        black....................................................................Failed
-        - hook id: black
-        - files were modified by this hook
-
-        reformatted presidio-analyzer/presidio_analyzer/predefined_recognizers/us_ssn_recognizer.py
-        All done!
-        1 file reformatted.
-
-        flake8...................................................................Passed
-
-        ```
+```sh
+[INFO] Initializing environment for https://github.com/astral-sh/ruff-pre-commit.
+[INFO] Installing environment for https://github.com/astral-sh/ruff-pre-commit.
+[INFO] Once installed this environment will be reused.
+[INFO] This may take a few minutes...
+ruff.....................................................................Passed
+ruff-format..............................................................Failed
+- hook id: ruff-format
+- files were modified by this hook
+  5 files reformatted, 4 files left unchanged
+```
 
 4. Committing again will finish successfully, with a well-formatted code.
diff --git a/presidio-analyzer/Pipfile b/presidio-analyzer/Pipfile
@@ -20,8 +20,6 @@ azure-core = "*"
 [dev-packages]
 pytest = "*"
 pytest-mock = "*"
-flake8= {version = ">=3.7.9"}
-pep8-naming = "*"
-flake8-docstrings = "*"
+ruff = "*"
 pre_commit = "*"
 python-dotenv = "*"
diff --git a/presidio-analyzer/presidio_analyzer/analyzer_engine.py b/presidio-analyzer/presidio_analyzer/analyzer_engine.py
@@ -187,7 +187,7 @@ def analyze(
         >>> results = analyzer.analyze(text='My phone number is 212-555-5555', entities=['PHONE_NUMBER'], language='en') # noqa D501
         >>> print(results)
         [type: PHONE_NUMBER, start: 19, end: 31, score: 0.85]
-        """
+        """  # noqa: E501
 
         all_fields = not entities
 

diff --git a/presidio-analyzer/presidio_analyzer/batch_analyzer_engine.py b/presidio-analyzer/presidio_analyzer/batch_analyzer_engine.py
@@ -19,7 +19,6 @@ class BatchAnalyzerEngine:
     """
 
     def __init__(self, analyzer_engine: Optional[AnalyzerEngine] = None):
-
         self.analyzer_engine = analyzer_engine
         if not analyzer_engine:
             self.analyzer_engine = AnalyzerEngine()
@@ -42,10 +41,10 @@ def analyze_iterator(
         texts = self._validate_types(texts)
 
         # Process the texts as batch for improved performance
-        nlp_artifacts_batch: Iterator[
-            Tuple[str, NlpArtifacts]
-        ] = self.analyzer_engine.nlp_engine.process_batch(
-            texts=texts, language=language
+        nlp_artifacts_batch: Iterator[Tuple[str, NlpArtifacts]] = (
+            self.analyzer_engine.nlp_engine.process_batch(
+                texts=texts, language=language
+            )
         )
 
         list_results = []
@@ -127,7 +126,7 @@ def analyze_dict(
     @staticmethod
     def _validate_types(value_iterator: Iterable[Any]) -> Iterator[Any]:
         for val in value_iterator:
-            if val and not type(val) in (int, float, bool, str):
+            if val and type(val) not in (int, float, bool, str):
                 err_msg = (
                     "Analyzer.analyze_iterator only works "
                     "on primitive types (int, float, bool, str). "

diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py
@@ -27,7 +27,8 @@ def __init__(
         azure_ai_endpoint: Optional[str] = None,
     ):
         """
-        Wrapper for the PII detection in Azure AI Language
+        Wrap the PII detection in Azure AI Language.
+
         :param supported_entities: List of supported entities for this recognizer.
         If None, all supported entities will be used.
         :param supported_language: Language code to use for the recognizer.
@@ -36,8 +37,8 @@ def __init__(
         :param azure_ai_key: Azure AI for language key
         :param azure_ai_endpoint: Azure AI for language endpoint
 
-        For more info, see https://learn.microsoft.com/en-us/azure/ai-services/language-service/personally-identifiable-information/overview # noqa
-        """
+        For more info, see https://learn.microsoft.com/en-us/azure/ai-services/language-service/personally-identifiable-information/overview
+        """  # noqa E501
 
         super().__init__(
             supported_entities=supported_entities,
@@ -73,7 +74,7 @@ def get_supported_entities(self) -> List[str]:
     @staticmethod
     def __get_azure_ai_supported_entities() -> List[str]:
         """Return the list of all supported entities for Azure AI Language."""
-        from azure.ai.textanalytics._models import PiiEntityCategory # noqa
+        from azure.ai.textanalytics._models import PiiEntityCategory  # noqa
 
         return [r.value.upper() for r in PiiEntityCategory]
 

diff --git a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py
@@ -334,9 +334,9 @@ def add_pattern_recognizer_from_dict(self, recognizer_dict: Dict) -> None:
 
         :example:
         >>> registry = RecognizerRegistry()
-        >>> recognizer = { "name": "Titles Recognizer", "supported_language": "de","supported_entity": "TITLE", "deny_list": ["Mr.","Mrs."]} # noqa: E501
+        >>> recognizer = { "name": "Titles Recognizer", "supported_language": "de","supported_entity": "TITLE", "deny_list": ["Mr.","Mrs."]}
         >>> registry.add_pattern_recognizer_from_dict(recognizer)
-        """
+        """  # noqa: E501
 
         recognizer = PatternRecognizer.from_dict(recognizer_dict)
         self.add_recognizer(recognizer)

diff --git a/presidio-analyzer/setup.cfg b/presidio-analyzer/setup.cfg
diff --git a/presidio-analyzer/setup.py b/presidio-analyzer/setup.py
@@ -1,4 +1,5 @@
 """Setup.py for Presidio Analyzer."""
+
 import os.path
 from os import path
 
@@ -27,7 +28,7 @@
         "presidio_analyzer": ["py.typed", "conf/*"],
     },
     trusted_host=["pypi.org"],
-    tests_require=["pytest", "flake8>=3.7.9"],
+    tests_require=["pytest", "ruff"],
     install_requires=[
         "spacy>=3.4.4, <4.0.0",
         "regex",

diff --git a/presidio-anonymizer/Pipfile b/presidio-anonymizer/Pipfile
@@ -9,7 +9,5 @@ pycryptodome = ">=3.10,<4.0.0"
 
 [dev-packages]
 pytest = "*"
-flake8 = { version = ">=3.7.9" }
-pep8-naming = "*"
-flake8-docstrings = "*"
+ruff = "*"
 pre_commit = "*"
diff --git a/presidio-anonymizer/setup.cfg b/presidio-anonymizer/setup.cfg
diff --git a/presidio-anonymizer/setup.py b/presidio-anonymizer/setup.py
@@ -5,7 +5,7 @@
 
 from setuptools import setup, find_packages
 
-test_requirements = ["pytest>=3", "flake8==3.7.9"]
+test_requirements = ["pytest>=3", "ruff"]
 
 __version__ = ""
 this_directory = path.abspath(path.dirname(__file__))

diff --git a/presidio-cli/Pipfile b/presidio-cli/Pipfile
@@ -5,7 +5,7 @@ pathspec = "*"
 
 [dev-packages]
 pytest = ">=6"
-flake8= {version = ">=3.7"}
+ruff = "*"
 pre_commit = ">=2"
 pytest-cov = "*"
 pytest-mock = "*"
@@ -0,0 +1,6 @@
+[tool.ruff]
+line-length = 120
+
+# To be fixed:
+[tool.ruff.lint]
+ignore = ["D205", "D400", "E721"]
diff --git a/presidio-cli/setup.cfg b/presidio-cli/setup.cfg
@@ -1,12 +1,6 @@
 [bdist_wheel]
 universal = 1
 
-[flake8]
-import-order-style = pep8
-application-import-names = presidio_cli
-ignore = E203,W503
-max-line-length = 120
-
 [build_sphinx]
 all-files = 1
 source-dir = docs

diff --git a/presidio-cli/setup.py b/presidio-cli/setup.py
@@ -31,5 +31,5 @@
     ],
     install_requires=["presidio-analyzer>=2.2", "pyyaml", "pathspec"],
     trusted_host=["pypi.org"],
-    tests_require=["pytest", "flake8>=3.7.9"],
+    tests_require=["pytest", "ruff"],
 )
diff --git a/presidio-image-redactor/Pipfile b/presidio-image-redactor/Pipfile
@@ -19,6 +19,4 @@ azure-ai-formrecognizer = ">=3.3.0,<4.0.0"
 [dev-packages]
 pytest = "*"
 pytest-mock = "*"
-flake8 = { version = ">=3.9.2" }
-pep8-naming = "*"
-flake8-docstrings = "*"
+ruff = "*"
@@ -0,0 +1,6 @@
+[tool.ruff]
+line-length = 120
+
+# To be fixed:
+[tool.ruff.lint]
+ignore = ["F401", "F841", "E402", "E721", "F541", "E712", "F811", "E722"]
diff --git a/presidio-image-redactor/setup.cfg b/presidio-image-redactor/setup.cfg
diff --git a/presidio-image-redactor/setup.py b/presidio-image-redactor/setup.py
@@ -1,4 +1,5 @@
 """Setup.py for Presidio Image Redactor."""
+
 import os.path
 from os import path
 
@@ -12,10 +13,10 @@
     "pydicom>=2.3.0",
     "pypng>=0.20220715.0",
     "azure-ai-formrecognizer>=3.3.0,<4.0.0",
-    "opencv-python>=4.0.0,<5.0.0"
+    "opencv-python>=4.0.0,<5.0.0",
 ]
 
-test_requirements = ["pytest>=3", "pytest-mock>=3.10.0", "flake8>=3.7.9"]
+test_requirements = ["pytest>=3", "pytest-mock>=3.10.0", "ruff"]
 
 __version__ = ""
 this_directory = path.abspath(path.dirname(__file__))

diff --git a/presidio-structured/Pipfile b/presidio-structured/Pipfile
@@ -11,7 +11,5 @@ pandas = ">=1.5.2"
 
 [dev-packages]
 pytest = "*"
-flake8 = { version = ">=3.7.9" }
-pep8-naming = "*"
-flake8-docstrings = "*"
+ruff = "*"
 pre_commit = "*"
@@ -0,0 +1,6 @@
+[tool.ruff]
+line-length = 120
+
+# To be fixed:
+[tool.ruff.lint]
+ignore = ["F841"]