Skip to content

Commit

Permalink
Updating verification engines and enable plotting of custom bboxes (#…
Browse files Browse the repository at this point in the history
…1164)

* Adding methods to enable plotting of custom bboxes

* Adding test for get_pii_bboxes

* Linting fixes and addition of test for add_custom_bboxes

* Adding use of custom bbox into DICOM verification engine

* Fixing tests for the DICOM verification engine

* Adding in ocr kwargs compatibility and updating tests

* Adding example notebook

* Linting fixes

* Remaining linting fix

---------

Co-authored-by: Omri Mendels <[email protected]>
  • Loading branch information
niwilso and omri374 authored Sep 28, 2023
1 parent beb605d commit 7400dc4
Show file tree
Hide file tree
Showing 7 changed files with 840 additions and 43 deletions.
440 changes: 440 additions & 0 deletions docs/samples/python/plot_custom_bboxes.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def verify_dicom_instance(
instance: pydicom.dataset.FileDataset,
padding_width: int = 25,
display_image: bool = True,
show_text_annotation: bool = True,
use_metadata: bool = True,
ocr_kwargs: Optional[dict] = None,
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
Expand All @@ -59,6 +60,8 @@ def verify_dicom_instance(
:param instance: Loaded DICOM instance including pixel data and metadata.
:param padding_width: Padding width to use when running OCR.
:param display_image: If the verificationimage is displayed and returned.
:param show_text_annotation: True to display entity type when displaying
image with bounding boxes.
:param use_metadata: Whether to redact text in the image that
are present in the metadata.
:param ocr_kwargs: Additional params for OCR methods.
Expand Down Expand Up @@ -88,23 +91,47 @@ def verify_dicom_instance(
loaded_image = Image.open(png_filepath)
image = self._add_padding(loaded_image, is_greyscale, padding_width)

# Get OCR results
perform_ocr_kwargs, ocr_threshold = self.image_analyzer_engine._parse_ocr_kwargs(ocr_kwargs) # noqa: E501
ocr_results = self.ocr_engine.perform_ocr(image, **perform_ocr_kwargs)
if ocr_threshold:
ocr_results = self.image_analyzer_engine.threshold_ocr_result(
ocr_results,
ocr_threshold
)
ocr_bboxes = self.bbox_processor.get_bboxes_from_ocr_results(
ocr_results
)

# Get analyzer results
ocr_results = self.ocr_engine.perform_ocr(image)
analyzer_results = self._get_analyzer_results(
image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers,
**text_analyzer_kwargs
)
analyzer_bboxes = self.bbox_processor.get_bboxes_from_analyzer_results(
analyzer_results
)

# Prepare for plotting
pii_bboxes = self.image_analyzer_engine.get_pii_bboxes(
ocr_bboxes,
analyzer_bboxes
)
if is_greyscale:
use_greyscale_cmap = True
else:
use_greyscale_cmap = False

# Get image with verification boxes
verify_image = (
self.verify(
image, ad_hoc_recognizers=ad_hoc_recognizers, **text_analyzer_kwargs
self.image_analyzer_engine.add_custom_bboxes(
image, pii_bboxes, show_text_annotation, use_greyscale_cmap
)
if display_image
else None
)

return verify_image, ocr_results, analyzer_results
return verify_image, ocr_bboxes, analyzer_bboxes

def eval_dicom_instance(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
from presidio_image_redactor import OCR, TesseractOCR
from presidio_image_redactor.entities import ImageRecognizerResult

from PIL import Image, ImageChops
import matplotlib.pyplot as plt
import matplotlib
import io


class ImageAnalyzerEngine:
"""ImageAnalyzerEngine class.
Expand Down Expand Up @@ -234,3 +239,125 @@ def _check_for_allow_list(text_analyzer_kwargs: dict) -> List[str]:
allow_list = text_analyzer_kwargs["allow_list"]

return allow_list

@staticmethod
def fig2img(fig: matplotlib.figure.Figure) -> Image:
"""Convert a Matplotlib figure to a PIL Image and return it.
:param fig: Matplotlib figure.
:return: Image of figure.
"""
buf = io.BytesIO()
fig.savefig(buf)
buf.seek(0)
img = Image.open(buf)

return img

@staticmethod
def get_pii_bboxes(
ocr_bboxes: List[dict],
analyzer_bboxes: List[dict]
) -> List[dict]:
"""Get a list of bboxes with is_PII property.
:param ocr_bboxes: Bboxes from OCR results.
:param analyzer_bboxes: Bboxes from analyzer results.
:return: All bboxes with appropriate label for whether it is PHI or not.
"""
bboxes = []
for ocr_bbox in ocr_bboxes:
has_match = False

# Check if we have the same bbox in analyzer results
for analyzer_bbox in analyzer_bboxes:
has_same_position = (ocr_bbox["left"] == analyzer_bbox["left"] and ocr_bbox["top"] == analyzer_bbox["top"]) # noqa: E501
has_same_dimension = (ocr_bbox["width"] == analyzer_bbox["width"] and ocr_bbox["height"] == analyzer_bbox["height"]) # noqa: E501
is_same = (has_same_position is True and has_same_dimension is True)

if is_same is True:
current_bbox = analyzer_bbox
current_bbox["is_PII"] = True
has_match = True
break

if has_match is False:
current_bbox = ocr_bbox
current_bbox["is_PII"] = False

bboxes.append(current_bbox)

return bboxes

@classmethod
def add_custom_bboxes(
cls,
image: Image,
bboxes: List[dict],
show_text_annotation: bool = True,
use_greyscale_cmap: bool = False
) -> Image:
"""Add custom bounding boxes to image.
:param image: Standard image of DICOM pixels.
:param bboxes: List of bounding boxes to display (with is_PII field).
:param gt_bboxes: Ground truth bboxes (list of dictionaries).
:param show_text_annotation: True if you want text annotation for
PHI status to display.
:param use_greyscale_cmap: Use greyscale color map.
:return: Image with bounding boxes drawn on.
"""
image_custom = ImageChops.duplicate(image)
image_x, image_y = image_custom.size

fig, ax = plt.subplots()
image_r = 70
fig.set_size_inches(image_x / image_r, image_y / image_r)

if len(bboxes) == 0:
ax.imshow(image_custom)
return image_custom
else:
for box in bboxes:
try:
entity_type = box["entity_type"]
except KeyError:
entity_type = "UNKNOWN"

try:
if box["is_PII"]:
bbox_color = "r"
else:
bbox_color = "b"
except KeyError:
bbox_color = "b"

# Get coordinates and dimensions
x0 = box["left"]
y0 = box["top"]
x1 = x0 + box["width"]
y1 = y0 + box["height"]
rect = matplotlib.patches.Rectangle(
(x0, y0),
x1 - x0, y1 - y0,
edgecolor=bbox_color,
facecolor="none"
)
ax.add_patch(rect)
if show_text_annotation:
ax.annotate(
entity_type,
xy=(x0 - 3, y0 - 3),
xycoords="data",
bbox=dict(boxstyle="round4,pad=.5", fc="0.9"),
)
if use_greyscale_cmap:
ax.imshow(image_custom, cmap="gray")
else:
ax.imshow(image_custom)
im_from_fig = cls.fig2img(fig)
im_resized = im_from_fig.resize((image_x, image_y))

return im_resized
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from PIL import Image, ImageChops
from presidio_image_redactor.image_redactor_engine import ImageRedactorEngine
from presidio_analyzer import PatternRecognizer
import matplotlib
import io
from matplotlib import pyplot as plt
from typing import Optional, List


Expand All @@ -23,6 +21,9 @@ class ImagePiiVerifyEngine(ImageRedactorEngine):
def verify(
self,
image: Image,
is_greyscale: bool = False,
display_image: bool = True,
show_text_annotation: bool = True,
ocr_kwargs: Optional[dict] = None,
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
**text_analyzer_kwargs
Expand All @@ -33,6 +34,10 @@ def verify(
new instance and manipulate it.
:param image: PIL Image to be processed.
:param is_greyscale: Whether the image is greyscale or not.
:param display_image: If the verificationimage is displayed and returned.
:param show_text_annotation: True to display entity type when displaying
image with bounding boxes.
:param ocr_kwargs: Additional params for OCR methods.
:param ad_hoc_recognizers: List of PatternRecognizer objects to use
for ad-hoc recognizer.
Expand All @@ -41,49 +46,59 @@ def verify(
:return: the annotated image
"""

image = ImageChops.duplicate(image)
image_x, image_y = image.size

# Detect PII
# Check the ad-hoc recognizers list
self._check_ad_hoc_recognizer_list(ad_hoc_recognizers)

# Detect text
perform_ocr_kwargs, ocr_threshold = self.image_analyzer_engine._parse_ocr_kwargs(ocr_kwargs) # noqa: E501
ocr_results = self.image_analyzer_engine.ocr.perform_ocr(
image,
**perform_ocr_kwargs
)
if ocr_threshold:
ocr_results = self.image_analyzer_engine.threshold_ocr_result(
ocr_results,
ocr_threshold
)
ocr_bboxes = self.bbox_processor.get_bboxes_from_ocr_results(ocr_results)

# Detect PII
if ad_hoc_recognizers is None:
bboxes = self.image_analyzer_engine.analyze(
analyzer_results = self.image_analyzer_engine.analyze(
image,
ocr_kwargs=ocr_kwargs,
**text_analyzer_kwargs,
)
else:
bboxes = self.image_analyzer_engine.analyze(
analyzer_results = self.image_analyzer_engine.analyze(
image,
ocr_kwargs=ocr_kwargs,
ad_hoc_recognizers=ad_hoc_recognizers,
**text_analyzer_kwargs,
)
analyzer_bboxes = self.bbox_processor.get_bboxes_from_analyzer_results(
analyzer_results
)

fig, ax = plt.subplots()
image_r = 70
fig.set_size_inches(image_x / image_r, image_y / image_r)
if len(bboxes) == 0:
return image
# Prepare for plotting
pii_bboxes = self.image_analyzer_engine.get_pii_bboxes(
ocr_bboxes,
analyzer_bboxes
)
if is_greyscale:
use_greyscale_cmap = True
else:
for box in bboxes:
entity_type = box.entity_type
x0 = box.left
y0 = box.top
x1 = x0 + box.width
y1 = y0 + box.height
rect = matplotlib.patches.Rectangle(
(x0, y0), x1 - x0, y1 - y0, edgecolor="b", facecolor="none"
)
ax.add_patch(rect)
ax.annotate(
entity_type,
xy=(x0 - 3, y0 - 3),
xycoords="data",
bbox=dict(boxstyle="round4,pad=.5", fc="0.9"),
)
ax.imshow(image)
im_from_fig = fig2img(fig)
im_resized = im_from_fig.resize((image_x, image_y))
return im_resized
use_greyscale_cmap = False

# Get image with verification boxes
verify_image = (
self.image_analyzer_engine.add_custom_bboxes(
image, pii_bboxes, show_text_annotation, use_greyscale_cmap
)
if display_image
else None
)

return verify_image
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,12 @@ def test_verify_correctly(
expected_ocr_results_labels.append(item["label"])

# Act
test_image_verify, test_ocr_results, _ = DicomImagePiiVerifyEngine().verify_dicom_instance(
test_image_verify, test_ocr_results_formatted, _ = DicomImagePiiVerifyEngine().verify_dicom_instance(
instance=get_mock_dicom_instance,
padding_width=PADDING_WIDTH,
display_image=True,
ocr_kwargs=None
)
test_ocr_results_formatted = BboxProcessor().get_bboxes_from_ocr_results(
ocr_results=test_ocr_results
)

# Check most OCR results (labels) are the same
# Don't worry about position since that is implied in analyzer results
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,26 @@ def test_verify_dicom_instance_happy_path(
mock_add_padding = mocker.patch.object(
DicomImagePiiVerifyEngine, "_add_padding", return_value=None
)
mock_parse_ocr_kwargs = mocker.patch.object(
ImageAnalyzerEngine, "_parse_ocr_kwargs", return_value=[{}, None]
)
mock_perform_ocr = mocker.patch.object(
TesseractOCR, "perform_ocr", return_value=None
)
mock_format_ocr_results = mocker.patch.object(
BboxProcessor, "get_bboxes_from_ocr_results", return_value=None
)
mock_analyze = mocker.patch.object(
DicomImagePiiVerifyEngine, "_get_analyzer_results", return_value=None
)
mock_verify = mocker.patch.object(
DicomImagePiiVerifyEngine, "verify", return_value=None
mock_format_analyzer_results = mocker.patch.object(
BboxProcessor, "get_bboxes_from_analyzer_results", return_value=None
)
mock_get_pii = mocker.patch.object(
ImageAnalyzerEngine, "get_pii_bboxes", return_value=None
)
mock_add_bboxes = mocker.patch.object(
ImageAnalyzerEngine, "add_custom_bboxes", return_value=None
)

# Act
Expand All @@ -105,9 +117,13 @@ def test_verify_dicom_instance_happy_path(
assert mock_save_pixel_array.call_count == 1
assert mock_image_open.call_count == 1
assert mock_add_padding.call_count == 1
assert mock_parse_ocr_kwargs.call_count == 1
assert mock_perform_ocr.call_count == 1
assert mock_format_ocr_results.call_count == 1
assert mock_analyze.call_count == 1
assert mock_verify.call_count == 1
assert mock_format_analyzer_results.call_count == 1
assert mock_get_pii.call_count == 1
assert mock_add_bboxes.call_count == 1

def test_verify_dicom_instance_exception(
mock_engine: DicomImagePiiVerifyEngine,
Expand Down
Loading

0 comments on commit 7400dc4

Please sign in to comment.