Skip to content

Commit

Permalink
DICOM redactor improvement: Enable return of redacted bboxes (#1111)
Browse files Browse the repository at this point in the history
* Enable return of bboxes used to redact pixels

* Adding return_bboxes arg values into existing tests

* Adding test for return_bbox==True condition

* Adding test for _save_bbox_json()

* Making argument name more clear

* Creating separate method to return redacted image and bboxes

* Linting fix

* Removing Union return type

* Commenting out DICOM verification engine intergration test to see if that is still the cause of unit test hangup

* Renaming test and removing redundancy in unit test for dicom image redactor

* Fixing duplication of call to a single file likely from main merges

* Removing extra cases for redact() test

* Changing mocked return type from None to an empty list

* Commenting out full unit test for redact to see effect on PR build hangup

* Reintroduce verify integration test and non-parameterized redact test

* Commenting out threshold and expected length test to see impact on PR build hang-up

* Undo comment out of image analyzer engine test

* Commenting out all unit tests for dicom image redactor engine

* Comment out unit test for redact()

* Fixing typing

* Commenting out exception test for redact_and_return_bbox

* Updated how exceptions are handled for redact_and_return_bbox, return all unit tests

* Adding IsADirectoryError exception type

* Commenting out happy path test for redact_and_return_bbox

* Commenting out compressed and icon_image_sequence DICOM test input images for redact_and_return_bbox happy path test

* Commenting out the type assertions in happy path test for redact_and_return_bbox

* Commenting out the call count assertions in happy path for redact_and_return_bbox

* Update type assertion and comment out all mocking and mocking assertions for happy path test for redact_and_return_bbox

* Commenting out all assertions in happy path test for redact_and_return_bbox

* Replacing mocker.patch with mocker.patch.object for all mocked methods in happy path test for redact_and_return_bbox

* Changing all mocker.patch.object calls into mocker.patch for happy path test for redact_and_return_bbox

* Reintroduce assertions for happy path test for redact_and_return_bbox

* Turning off assertions for call count again for happy path for redact_and_return_bbox

* Making assertion for returned bbox type even more explicit for happy path test for redact_and_return_bbox

* Turning off type assertions and turning on mock call count assertions for happy path test for redact_and_return_bbox

* Replacing call count assertions with assert_called_once

* Reintroducing type assertions and changing return_value to include some placeholder mock data instead of being empty dictionaries in list

* Comment out the image type assertion

* Turning on image type assertion and turning off bbox type assertions

* Removing assertion for dict

* Using isinstance instead of type ==

* Removing assertions for bbox type

---------

Co-authored-by: Omri Mendels <[email protected]>
  • Loading branch information
niwilso and omri374 authored Aug 2, 2023
1 parent 67833d5 commit e323fed
Show file tree
Hide file tree
Showing 2 changed files with 247 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from pydicom.pixel_data_handlers.util import apply_voi_lut
import PIL
import png
import json
import numpy as np
from matplotlib import pyplot as plt # necessary import for PIL typing # noqa: F401
from typing import Tuple, List, Union, Optional
from typing import Tuple, List, Dict, Union, Optional

from presidio_image_redactor import ImageRedactorEngine
from presidio_image_redactor import ImageAnalyzerEngine # noqa: F401
Expand All @@ -24,16 +25,16 @@ class DicomImageRedactorEngine(ImageRedactorEngine):
:param image_analyzer_engine: Engine which performs OCR + PII detection.
"""

def redact(
def redact_and_return_bbox(
self,
image: pydicom.dataset.FileDataset,
fill: str = "contrast",
padding_width: int = 25,
crop_ratio: float = 0.75,
ocr_kwargs: Optional[dict] = None,
**text_analyzer_kwargs,
):
"""Redact method to redact the given DICOM image.
) -> Tuple[pydicom.dataset.FileDataset, List[Dict[str, int]]]:
"""Redact method to redact the given DICOM image and return redacted bboxes.
Please note, this method duplicates the image, creates a
new instance and manipulates it.
Expand All @@ -54,8 +55,12 @@ def redact(
raise TypeError("The provided image must be a loaded DICOM instance.")
try:
image.PixelData
except AttributeError:
raise AttributeError("Provided DICOM instance lacks pixel data.")
except AttributeError as e:
raise AttributeError(f"Provided DICOM instance lacks pixel data: {e}")
except PermissionError as e:
raise PermissionError(f"Unable to access pixel data (may not exist): {e}")
except IsADirectoryError as e:
raise IsADirectoryError(f"DICOM instance is a directory: {e}")

instance = deepcopy(image)

Expand Down Expand Up @@ -93,6 +98,42 @@ def redact(
)
redacted_image = self._add_redact_box(instance, bboxes, crop_ratio, fill)

return redacted_image, bboxes

def redact(
self,
image: pydicom.dataset.FileDataset,
fill: str = "contrast",
padding_width: int = 25,
crop_ratio: float = 0.75,
ocr_kwargs: Optional[dict] = None,
**text_analyzer_kwargs,
) -> pydicom.dataset.FileDataset:
"""Redact method to redact the given DICOM image.
Please note, this method duplicates the image, creates a
new instance and manipulates it.
:param image: Loaded DICOM instance including pixel data and metadata.
:param fill: Fill setting to use for redaction box ("contrast" or "background").
:param padding_width: Padding width to use when running OCR.
:param crop_ratio: Portion of image to consider when selecting
most common pixel value as the background color value.
:param ocr_kwargs: Additional params for OCR methods.
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.
:return: DICOM instance with redacted pixel data.
"""
redacted_image, _ = self.redact_and_return_bbox(
image=image,
fill=fill,
padding_width=padding_width,
crop_ratio=crop_ratio,
ocr_kwargs=ocr_kwargs,
**text_analyzer_kwargs
)

return redacted_image

def redact_from_file(
Expand All @@ -102,6 +143,7 @@ def redact_from_file(
padding_width: int = 25,
crop_ratio: float = 0.75,
fill: str = "contrast",
save_bboxes: bool = False,
ocr_kwargs: Optional[dict] = None,
**text_analyzer_kwargs,
) -> None:
Expand All @@ -115,6 +157,7 @@ def redact_from_file(
:param padding_width : Padding width to use when running OCR.
:param fill: Color setting to use for redaction box
("contrast" or "background").
:param save_bboxes: True if we want to save boundings boxes.
:param ocr_kwargs: Additional params for OCR methods.
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.
Expand All @@ -140,6 +183,7 @@ def redact_from_file(
padding_width=padding_width,
overwrite=True,
dst_parent_dir=".",
save_bboxes=save_bboxes,
ocr_kwargs=ocr_kwargs,
**text_analyzer_kwargs,
)
Expand All @@ -155,6 +199,7 @@ def redact_from_directory(
padding_width: int = 25,
crop_ratio: float = 0.75,
fill: str = "contrast",
save_bboxes: bool = False,
ocr_kwargs: Optional[dict] = None,
**text_analyzer_kwargs,
) -> None:
Expand All @@ -170,6 +215,7 @@ def redact_from_directory(
most common pixel value as the background color value.
:param fill: Color setting to use for redaction box
("contrast" or "background").
:param save_bboxes: True if we want to save boundings boxes.
:param ocr_kwargs: Additional params for OCR methods.
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.
Expand All @@ -195,6 +241,7 @@ def redact_from_directory(
padding_width=padding_width,
overwrite=True,
dst_parent_dir=".",
save_bboxes=save_bboxes,
ocr_kwargs=ocr_kwargs,
**text_analyzer_kwargs,
)
Expand Down Expand Up @@ -516,7 +563,7 @@ def _copy_files_for_processing(src_path: str, dst_parent_dir: str) -> Path:
elif Path(src_path).is_file() is True:
# Create the output dir manually if working with a single file
os.makedirs(Path(dst_path).parent, exist_ok=True)
shutil.copy(src_path, dst_path)
shutil.copyfile(src_path, dst_path)
else:
raise FileNotFoundError(f"{src_path} does not exist")

Expand Down Expand Up @@ -811,6 +858,19 @@ def _add_redact_box(

return redacted_instance

@staticmethod
def _save_bbox_json(output_dcm_path: str, bboxes: List[Dict[str, int]]) -> None:
"""Save the redacted bounding box info as a json file.
:param output_dcm_path: Path to the redacted DICOM file.
:param bboxes: Bounding boxes used in redaction.
"""
output_json_path = Path(output_dcm_path).with_suffix(".json")

with open(output_json_path, "w") as write_file:
json.dump(bboxes, write_file, indent=4)

def _redact_single_dicom_image(
self,
dcm_path: str,
Expand All @@ -819,6 +879,7 @@ def _redact_single_dicom_image(
padding_width: int,
overwrite: bool,
dst_parent_dir: str,
save_bboxes: bool,
ocr_kwargs: Optional[dict] = None,
**text_analyzer_kwargs,
) -> str:
Expand All @@ -833,6 +894,7 @@ def _redact_single_dicom_image(
:param overwrite: Only set to True if you are providing the
duplicated DICOM path in dcm_path.
:param dst_parent_dir: String path to parent directory of where to store copies.
:param save_bboxes: True if we want to save boundings boxes.
:param ocr_kwargs: Additional params for OCR methods.
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.
Expand Down Expand Up @@ -892,6 +954,10 @@ def _redact_single_dicom_image(
)
redacted_dicom_instance.save_as(dst_path)

# Save redacted bboxes
if save_bboxes:
self._save_bbox_json(dst_path, bboxes)

return dst_path

def _redact_multiple_dicom_images(
Expand All @@ -902,6 +968,7 @@ def _redact_multiple_dicom_images(
padding_width: int,
overwrite: bool,
dst_parent_dir: str,
save_bboxes: bool,
ocr_kwargs: Optional[dict] = None,
**text_analyzer_kwargs,
) -> str:
Expand All @@ -916,6 +983,7 @@ def _redact_multiple_dicom_images(
:param overwrite: Only set to True if you are providing
the duplicated DICOM dir in dcm_dir.
:param dst_parent_dir: String path to parent directory of where to store copies.
:param save_bboxes: True if we want to save boundings boxes.
:param ocr_kwargs: Additional params for OCR methods.
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.
Expand Down Expand Up @@ -945,6 +1013,7 @@ def _redact_multiple_dicom_images(
padding_width,
overwrite,
dst_parent_dir,
save_bboxes,
ocr_kwargs=ocr_kwargs,
**text_analyzer_kwargs,
)
Expand Down
Loading

0 comments on commit e323fed

Please sign in to comment.