From 4f1dfce7c36ee719b4bddbab8b96fb1e8ecd93b2 Mon Sep 17 00:00:00 2001 From: Vitali Yanushchyk Date: Thu, 16 Jan 2025 10:38:06 -0300 Subject: [PATCH] chg ! findings response --- .../apps/api/admin/finding.py | 11 +++- ...g_error_finding_first_filename_and_more.py | 61 +++++++++++++++++++ .../apps/api/models/deduplication.py | 58 +++++++++++------- src/hope_dedup_engine/apps/api/serializers.py | 6 +- .../apps/faces/celery_tasks.py | 2 +- .../apps/faces/services/facial.py | 23 ++++--- src/hope_dedup_engine/apps/faces/utils.py | 15 +++++ .../apps/faces/utils/__init__.py | 0 src/hope_dedup_engine/constants.py | 20 ------ src/hope_dedup_engine/types.py | 2 +- 10 files changed, 142 insertions(+), 56 deletions(-) create mode 100644 src/hope_dedup_engine/apps/api/migrations/0016_remove_finding_error_finding_first_filename_and_more.py create mode 100644 src/hope_dedup_engine/apps/faces/utils.py delete mode 100644 src/hope_dedup_engine/apps/faces/utils/__init__.py delete mode 100644 src/hope_dedup_engine/constants.py diff --git a/src/hope_dedup_engine/apps/api/admin/finding.py b/src/hope_dedup_engine/apps/api/admin/finding.py index dd1c1e72..d6d528bf 100644 --- a/src/hope_dedup_engine/apps/api/admin/finding.py +++ b/src/hope_dedup_engine/apps/api/admin/finding.py @@ -4,7 +4,7 @@ from adminfilters.filters import DjangoLookupFilter, NumberFilter from adminfilters.mixin import AdminFiltersMixin -from hope_dedup_engine.apps.api.models import Finding +from hope_dedup_engine.apps.api.models import Finding, Image @register(Finding) @@ -15,12 +15,17 @@ class FindingAdmin(AdminFiltersMixin, ModelAdmin): "score", "first_reference_pk", "second_reference_pk", - "error", + "formatted_status_code", ) + + def formatted_status_code(self, obj): + return f"{obj.status_code} {Image.StatusCode(obj.status_code).name}" + + formatted_status_code.short_description = "Status Code" + list_filter = ( ("deduplication_set", AutoCompleteFilter), ("score", NumberFilter), - ("error", NumberFilter), DjangoLookupFilter, ) diff --git a/src/hope_dedup_engine/apps/api/migrations/0016_remove_finding_error_finding_first_filename_and_more.py b/src/hope_dedup_engine/apps/api/migrations/0016_remove_finding_error_finding_first_filename_and_more.py new file mode 100644 index 00000000..73f2db59 --- /dev/null +++ b/src/hope_dedup_engine/apps/api/migrations/0016_remove_finding_error_finding_first_filename_and_more.py @@ -0,0 +1,61 @@ +# Generated by Django 5.1.4 on 2025-01-16 13:06 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("api", "0015_deduplicationset_encodings_finding_delete_duplicate"), + ] + + operations = [ + migrations.RemoveField( + model_name="finding", + name="error", + ), + migrations.AddField( + model_name="finding", + name="first_filename", + field=models.CharField(default="", max_length=255), + ), + migrations.AddField( + model_name="finding", + name="second_filename", + field=models.CharField(default="", max_length=255), + ), + migrations.AddField( + model_name="finding", + name="status_code", + field=models.IntegerField( + choices=[ + (200, "deduplication success"), + (404, "no file found"), + (412, "no face detected"), + (429, "multiple faces detected"), + (500, "generic error"), + ], + default=200, + ), + ), + migrations.AlterField( + model_name="finding", + name="score", + field=models.FloatField( + default=0, + validators=[ + django.core.validators.MinValueValidator(0), + django.core.validators.MaxValueValidator(1), + ], + verbose_name="Similarity Score", + ), + ), + migrations.AlterField( + model_name="finding", + name="second_reference_pk", + field=models.CharField( + default="", max_length=100, verbose_name="Second reference" + ), + ), + ] diff --git a/src/hope_dedup_engine/apps/api/models/deduplication.py b/src/hope_dedup_engine/apps/api/models/deduplication.py index 31649854..6747e82b 100644 --- a/src/hope_dedup_engine/apps/api/models/deduplication.py +++ b/src/hope_dedup_engine/apps/api/models/deduplication.py @@ -2,6 +2,7 @@ from uuid import uuid4 from django.conf import settings +from django.core.validators import MaxValueValidator, MinValueValidator from django.db import models from hope_dedup_engine.apps.security.models import ExternalSystem @@ -81,19 +82,25 @@ def update_encodings(self, encodings: EncodingType) -> None: self.save() def update_findings(self, findings: FindingType) -> None: - Finding.objects.bulk_create( - [ - Finding( - deduplication_set=self, - first_reference_pk=f[0], - second_reference_pk=f[1], - score=f[2], - error=f[3], - ) - for f in findings - ], - ignore_conflicts=True, + images = Image.objects.filter(deduplication_set=self).values( + "filename", "reference_pk" ) + filename_to_reference_pk = { + img["filename"]: img["reference_pk"] for img in images + } | {"": ""} + findings_to_create = [ + Finding( + deduplication_set=self, + first_filename=f[0], + first_reference_pk=filename_to_reference_pk.get(f[0]), + second_filename=f[1], + second_reference_pk=filename_to_reference_pk.get(f[1]), + score=f[2], + status_code=f[3], + ) + for f in findings + ] + Finding.objects.bulk_create(findings_to_create, ignore_conflicts=True) class Image(models.Model): @@ -101,6 +108,13 @@ class Image(models.Model): # TODO: rename to Entity/Entry """ + class StatusCode(models.IntegerChoices): + DEDUPLICATE_SUCCESS = 200, "deduplication success" + NO_FILE_FOUND = 404, "no file found" + NO_FACE_DETECTED = 412, "no face detected" + MULTIPLE_FACES_DETECTED = 429, "multiple faces detected" + GENERIC_ERROR = 500, "generic error" + id = models.UUIDField(primary_key=True, default=uuid4) deduplication_set = models.ForeignKey(DeduplicationSet, on_delete=models.CASCADE) reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH) @@ -120,21 +134,23 @@ class Finding(models.Model): Couple of finding entities """ - # class ErrorCode(models.IntegerChoices): - # GENERIC_ERROR = 999 - # NO_FACE_DETECTED = 998 - # MULTIPLE_FACES_DETECTED = 997 - # NO_FILE_FOUND = 996 - deduplication_set = models.ForeignKey(DeduplicationSet, on_delete=models.CASCADE) first_reference_pk = models.CharField( max_length=REFERENCE_PK_LENGTH, verbose_name="First reference" ) + first_filename = models.CharField(default="", max_length=255) second_reference_pk = models.CharField( - max_length=REFERENCE_PK_LENGTH, verbose_name="Second reference" + default="", max_length=REFERENCE_PK_LENGTH, verbose_name="Second reference" + ) + second_filename = models.CharField(default="", max_length=255) + score = models.FloatField( + default=0, + validators=[MinValueValidator(0), MaxValueValidator(1)], + verbose_name="Similarity Score", + ) + status_code = models.IntegerField( + choices=Image.StatusCode.choices, default=Image.StatusCode.DEDUPLICATE_SUCCESS ) - score = models.FloatField(default=0, validators=[], verbose_name="Similarity Score") - error = models.IntegerField(null=True, blank=True) class Meta: unique_together = ( diff --git a/src/hope_dedup_engine/apps/api/serializers.py b/src/hope_dedup_engine/apps/api/serializers.py index 04d1cce8..7e695b87 100644 --- a/src/hope_dedup_engine/apps/api/serializers.py +++ b/src/hope_dedup_engine/apps/api/serializers.py @@ -71,6 +71,7 @@ class Meta: class EntrySerializer(serializers.Serializer): reference_pk = serializers.SerializerMethodField() + filename = serializers.SerializerMethodField() def __init__(self, prefix: str, *args: Any, **kwargs: Any) -> None: self._prefix = prefix @@ -79,6 +80,9 @@ def __init__(self, prefix: str, *args: Any, **kwargs: Any) -> None: def get_reference_pk(self, duplicate: Finding) -> int: return getattr(duplicate, f"{self._prefix}_reference_pk") + def get_filename(self, duplicate: Finding) -> str: + return getattr(duplicate, f"{self._prefix}_filename") + class DuplicateSerializer(serializers.ModelSerializer): first = EntrySerializer(prefix="first", source="*") @@ -86,7 +90,7 @@ class DuplicateSerializer(serializers.ModelSerializer): class Meta: model = Finding - fields = "first", "second", "score", "error" + fields = "first", "second", "score", "status_code" CREATE_PAIR_FIELDS = "first", "second" diff --git a/src/hope_dedup_engine/apps/faces/celery_tasks.py b/src/hope_dedup_engine/apps/faces/celery_tasks.py index 933af02e..00bd815b 100644 --- a/src/hope_dedup_engine/apps/faces/celery_tasks.py +++ b/src/hope_dedup_engine/apps/faces/celery_tasks.py @@ -120,7 +120,7 @@ def callback_findings( record for d in results for record in d - if not (pair := tuple(sorted(record[:2]))) in seen_pairs + if (pair := tuple(sorted(record[:2]))) not in seen_pairs and not seen_pairs.add(pair) ] ds.update_findings(findings) diff --git a/src/hope_dedup_engine/apps/faces/services/facial.py b/src/hope_dedup_engine/apps/faces/services/facial.py index 1768bdb5..02731be0 100644 --- a/src/hope_dedup_engine/apps/faces/services/facial.py +++ b/src/hope_dedup_engine/apps/faces/services/facial.py @@ -4,8 +4,9 @@ from deepface import DeepFace +from hope_dedup_engine.apps.api.models import Image from hope_dedup_engine.apps.faces.managers import ImagesStorageManager -from hope_dedup_engine.constants import FacialError, is_facial_error +from hope_dedup_engine.apps.faces.utils import is_facial_error from hope_dedup_engine.types import EncodingType, FindingType, IgnoredPairType logger = logging.getLogger(__name__) @@ -36,7 +37,7 @@ def encode_faces( for file in files: progress() if file not in images: - encoded[file] = FacialError.NO_FILE_FOUND.name + encoded[file] = Image.StatusCode.NO_FILE_FOUND.name continue if file in encoded: existing_cnt += 1 @@ -44,15 +45,15 @@ def encode_faces( try: result = DeepFace.represent(storage.load_image(file), **(options or {})) if len(result) > 1: - encoded[file] = FacialError.MULTIPLE_FACES_DETECTED.name + encoded[file] = Image.StatusCode.MULTIPLE_FACES_DETECTED.name else: encoded[file] = result[0]["embedding"] added_cnt += 1 except TypeError as e: logger.exception(e) - encoded[file] = FacialError.GENERIC_ERROR.name + encoded[file] = Image.StatusCode.GENERIC_ERROR.name except ValueError: - encoded[file] = FacialError.NO_FACE_DETECTED.name + encoded[file] = Image.StatusCode.NO_FACE_DETECTED.name return encoded, added_cnt, existing_cnt @@ -75,7 +76,7 @@ def dedupe_images( # noqa 901 progress() enc1 = encodings[file1] if is_facial_error(enc1): - findings[file1].append([enc1, FacialError[enc1].code]) + findings[file1].append([enc1, None]) continue for file2, enc2 in encodings.items(): if ( @@ -93,10 +94,14 @@ def dedupe_images( # noqa 901 findings[file1].append([file2, similarity]) results: FindingType = [] + for img, duplicates in findings.items(): for dup in duplicates: - if is_facial_error(dup[1]): - results.append((img, dup[0], 0, dup[1])) + if is_facial_error(dup[0]): + results.append((img, "", 0, Image.StatusCode[dup[0]].value)) else: - results.append((img, dup[0], dup[1], None)) + results.append( + (img, dup[0], dup[1], Image.StatusCode.DEDUPLICATE_SUCCESS.value) + ) + return results diff --git a/src/hope_dedup_engine/apps/faces/utils.py b/src/hope_dedup_engine/apps/faces/utils.py new file mode 100644 index 00000000..74b6ecd0 --- /dev/null +++ b/src/hope_dedup_engine/apps/faces/utils.py @@ -0,0 +1,15 @@ +from hope_dedup_engine.apps.api.models import Image + + +def is_facial_error(value): + if isinstance(value, (int, str)): + return value not in { + Image.StatusCode.DEDUPLICATE_SUCCESS, + Image.StatusCode.DEDUPLICATE_SUCCESS.name, + Image.StatusCode.DEDUPLICATE_SUCCESS.label, + } and value in ( + Image.StatusCode.values + + Image.StatusCode.names + + [choice.label for choice in Image.StatusCode] + ) + return False diff --git a/src/hope_dedup_engine/apps/faces/utils/__init__.py b/src/hope_dedup_engine/apps/faces/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/hope_dedup_engine/constants.py b/src/hope_dedup_engine/constants.py deleted file mode 100644 index 1c810f9a..00000000 --- a/src/hope_dedup_engine/constants.py +++ /dev/null @@ -1,20 +0,0 @@ -from enum import Enum - - -class FacialError(Enum): - GENERIC_ERROR = 999 - NO_FACE_DETECTED = 998 - MULTIPLE_FACES_DETECTED = 997 - NO_FILE_FOUND = 996 - - @property - def code(self) -> int: - return self.value - - -def is_facial_error(value): - if isinstance(value, str): - return value in FacialError.__members__ - if isinstance(value, int): - return value in FacialError._value2member_map_ - return False diff --git a/src/hope_dedup_engine/types.py b/src/hope_dedup_engine/types.py index 88ac339b..fca30a40 100644 --- a/src/hope_dedup_engine/types.py +++ b/src/hope_dedup_engine/types.py @@ -1,4 +1,4 @@ EncodingType = dict[str, str | list[float]] -FindingRecord = tuple[str, str, float] +FindingRecord = tuple[str, str, float, int] FindingType = list[FindingRecord | None] IgnoredPairType = list[str, str]