Skip to content

Commit

Permalink
chg ! findings response
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed Jan 16, 2025
1 parent 549d86b commit e573cef
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 65 deletions.
11 changes: 8 additions & 3 deletions src/hope_dedup_engine/apps/api/admin/finding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from adminfilters.filters import DjangoLookupFilter, NumberFilter
from adminfilters.mixin import AdminFiltersMixin

from hope_dedup_engine.apps.api.models import Finding
from hope_dedup_engine.apps.api.models import Finding, Image


@register(Finding)
Expand All @@ -15,12 +15,17 @@ class FindingAdmin(AdminFiltersMixin, ModelAdmin):
"score",
"first_reference_pk",
"second_reference_pk",
"error",
"formatted_status_code",
)

def formatted_status_code(self, obj):
return f"{obj.status_code} {Image.StatusCode(obj.status_code).name}"

formatted_status_code.short_description = "Status Code"

list_filter = (
("deduplication_set", AutoCompleteFilter),
("score", NumberFilter),
("error", NumberFilter),
DjangoLookupFilter,
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Generated by Django 5.1.4 on 2025-01-16 13:06

import django.core.validators
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("api", "0015_deduplicationset_encodings_finding_delete_duplicate"),
]

operations = [
migrations.RemoveField(
model_name="finding",
name="error",
),
migrations.AddField(
model_name="finding",
name="first_filename",
field=models.CharField(default="", max_length=255),
),
migrations.AddField(
model_name="finding",
name="second_filename",
field=models.CharField(default="", max_length=255),
),
migrations.AddField(
model_name="finding",
name="status_code",
field=models.IntegerField(
choices=[
(200, "deduplication success"),
(404, "no file found"),
(412, "no face detected"),
(429, "multiple faces detected"),
(500, "generic error"),
],
default=200,
),
),
migrations.AlterField(
model_name="finding",
name="score",
field=models.FloatField(
default=0,
validators=[
django.core.validators.MinValueValidator(0),
django.core.validators.MaxValueValidator(1),
],
verbose_name="Similarity Score",
),
),
migrations.AlterField(
model_name="finding",
name="second_reference_pk",
field=models.CharField(
default="", max_length=100, verbose_name="Second reference"
),
),
]
58 changes: 37 additions & 21 deletions src/hope_dedup_engine/apps/api/models/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from uuid import uuid4

from django.conf import settings
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models

from hope_dedup_engine.apps.security.models import ExternalSystem
Expand Down Expand Up @@ -81,26 +82,39 @@ def update_encodings(self, encodings: EncodingType) -> None:
self.save()

def update_findings(self, findings: FindingType) -> None:
Finding.objects.bulk_create(
[
Finding(
deduplication_set=self,
first_reference_pk=f[0],
second_reference_pk=f[1],
score=f[2],
error=f[3],
)
for f in findings
],
ignore_conflicts=True,
images = Image.objects.filter(deduplication_set=self).values(
"filename", "reference_pk"
)
filename_to_reference_pk = {
img["filename"]: img["reference_pk"] for img in images
} | {"": ""}
findings_to_create = [
Finding(
deduplication_set=self,
first_filename=f[0],
first_reference_pk=filename_to_reference_pk.get(f[0]),
second_filename=f[1],
second_reference_pk=filename_to_reference_pk.get(f[1]),
score=f[2],
status_code=f[3],
)
for f in findings
]
Finding.objects.bulk_create(findings_to_create, ignore_conflicts=True)


class Image(models.Model):
"""
# TODO: rename to Entity/Entry
"""

class StatusCode(models.IntegerChoices):
DEDUPLICATE_SUCCESS = 200, "deduplication success"
NO_FILE_FOUND = 404, "no file found"
NO_FACE_DETECTED = 412, "no face detected"
MULTIPLE_FACES_DETECTED = 429, "multiple faces detected"
GENERIC_ERROR = 500, "generic error"

id = models.UUIDField(primary_key=True, default=uuid4)
deduplication_set = models.ForeignKey(DeduplicationSet, on_delete=models.CASCADE)
reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH)
Expand All @@ -120,21 +134,23 @@ class Finding(models.Model):
Couple of finding entities
"""

# class ErrorCode(models.IntegerChoices):
# GENERIC_ERROR = 999
# NO_FACE_DETECTED = 998
# MULTIPLE_FACES_DETECTED = 997
# NO_FILE_FOUND = 996

deduplication_set = models.ForeignKey(DeduplicationSet, on_delete=models.CASCADE)
first_reference_pk = models.CharField(
max_length=REFERENCE_PK_LENGTH, verbose_name="First reference"
)
first_filename = models.CharField(default="", max_length=255)
second_reference_pk = models.CharField(
max_length=REFERENCE_PK_LENGTH, verbose_name="Second reference"
default="", max_length=REFERENCE_PK_LENGTH, verbose_name="Second reference"
)
second_filename = models.CharField(default="", max_length=255)
score = models.FloatField(
default=0,
validators=[MinValueValidator(0), MaxValueValidator(1)],
verbose_name="Similarity Score",
)
status_code = models.IntegerField(
choices=Image.StatusCode.choices, default=Image.StatusCode.DEDUPLICATE_SUCCESS
)
score = models.FloatField(default=0, validators=[], verbose_name="Similarity Score")
error = models.IntegerField(null=True, blank=True)

class Meta:
unique_together = (
Expand Down
6 changes: 5 additions & 1 deletion src/hope_dedup_engine/apps/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class Meta:

class EntrySerializer(serializers.Serializer):
reference_pk = serializers.SerializerMethodField()
filename = serializers.SerializerMethodField()

def __init__(self, prefix: str, *args: Any, **kwargs: Any) -> None:
self._prefix = prefix
Expand All @@ -79,14 +80,17 @@ def __init__(self, prefix: str, *args: Any, **kwargs: Any) -> None:
def get_reference_pk(self, duplicate: Finding) -> int:
return getattr(duplicate, f"{self._prefix}_reference_pk")

def get_filename(self, duplicate: Finding) -> str:
return getattr(duplicate, f"{self._prefix}_filename")


class DuplicateSerializer(serializers.ModelSerializer):
first = EntrySerializer(prefix="first", source="*")
second = EntrySerializer(prefix="second", source="*")

class Meta:
model = Finding
fields = "first", "second", "score", "error"
fields = "first", "second", "score", "status_code"


CREATE_PAIR_FIELDS = "first", "second"
Expand Down
2 changes: 1 addition & 1 deletion src/hope_dedup_engine/apps/faces/celery_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def callback_findings(
record
for d in results
for record in d
if not (pair := tuple(sorted(record[:2]))) in seen_pairs
if (pair := tuple(sorted(record[:2]))) not in seen_pairs
and not seen_pairs.add(pair)
]
ds.update_findings(findings)
Expand Down
23 changes: 14 additions & 9 deletions src/hope_dedup_engine/apps/faces/services/facial.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from deepface import DeepFace

from hope_dedup_engine.apps.api.models import Image
from hope_dedup_engine.apps.faces.managers import ImagesStorageManager
from hope_dedup_engine.constants import FacialError, is_facial_error
from hope_dedup_engine.apps.faces.utils import is_facial_error
from hope_dedup_engine.types import EncodingType, FindingType, IgnoredPairType

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -36,23 +37,23 @@ def encode_faces(
for file in files:
progress()
if file not in images:
encoded[file] = FacialError.NO_FILE_FOUND.name
encoded[file] = Image.StatusCode.NO_FILE_FOUND.name
continue
if file in encoded:
existing_cnt += 1
continue
try:
result = DeepFace.represent(storage.load_image(file), **(options or {}))
if len(result) > 1:
encoded[file] = FacialError.MULTIPLE_FACES_DETECTED.name
encoded[file] = Image.StatusCode.MULTIPLE_FACES_DETECTED.name
else:
encoded[file] = result[0]["embedding"]
added_cnt += 1
except TypeError as e:
logger.exception(e)
encoded[file] = FacialError.GENERIC_ERROR.name
encoded[file] = Image.StatusCode.GENERIC_ERROR.name
except ValueError:
encoded[file] = FacialError.NO_FACE_DETECTED.name
encoded[file] = Image.StatusCode.NO_FACE_DETECTED.name
return encoded, added_cnt, existing_cnt


Expand All @@ -75,7 +76,7 @@ def dedupe_images( # noqa 901
progress()
enc1 = encodings[file1]
if is_facial_error(enc1):
findings[file1].append([enc1, FacialError[enc1].code])
findings[file1].append([enc1, None])
continue
for file2, enc2 in encodings.items():
if (
Expand All @@ -93,10 +94,14 @@ def dedupe_images( # noqa 901
findings[file1].append([file2, similarity])

results: FindingType = []

for img, duplicates in findings.items():
for dup in duplicates:
if is_facial_error(dup[1]):
results.append((img, dup[0], 0, dup[1]))
if is_facial_error(dup[0]):
results.append((img, "", 0, Image.StatusCode[dup[0]].value))
else:
results.append((img, dup[0], dup[1], None))
results.append(
(img, dup[0], dup[1], Image.StatusCode.DEDUPLICATE_SUCCESS.value)
)

return results
15 changes: 15 additions & 0 deletions src/hope_dedup_engine/apps/faces/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from hope_dedup_engine.apps.api.models import Image


def is_facial_error(value):
if isinstance(value, (int, str)):
return value not in {
Image.StatusCode.DEDUPLICATE_SUCCESS,
Image.StatusCode.DEDUPLICATE_SUCCESS.name,
Image.StatusCode.DEDUPLICATE_SUCCESS.label,
} and value in (
Image.StatusCode.values
+ Image.StatusCode.names
+ [choice.label for choice in Image.StatusCode]
)
return False
Empty file.
20 changes: 0 additions & 20 deletions src/hope_dedup_engine/constants.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/hope_dedup_engine/types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
EncodingType = dict[str, str | list[float]]
FindingRecord = tuple[str, str, float]
FindingRecord = tuple[str, str, float, int]
FindingType = list[FindingRecord | None]
IgnoredPairType = list[str, str]
1 change: 1 addition & 0 deletions tests/admin/test_admin_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def extend(self, __iterable) -> None:

GLOBAL_EXCLUDED_MODELS = RegexList(
[
r"api\.Finding",
r"django_celery_beat\.ClockedSchedule",
r"contenttypes\.ContentType",
r"faces\.DummyModel",
Expand Down
14 changes: 5 additions & 9 deletions tests/extras/testutils/factories/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
IgnoredReferencePkPair,
Image,
)
from hope_dedup_engine.constants import FacialError


class TokenFactory(DjangoModelFactory):
Expand Down Expand Up @@ -67,22 +66,19 @@ class Meta:

deduplication_set = SubFactory(DeduplicationSetFactory)
first_reference_pk = fuzzy.FuzzyText()
first_filename = fuzzy.FuzzyText()
second_reference_pk = fuzzy.FuzzyText()
second_filename = fuzzy.FuzzyText()
score = fuzzy.FuzzyFloat(low=0, high=1)

@lazy_attribute
def error(self):
def status_code(self):
return (
fuzzy.FuzzyChoice(list(FacialError)).fuzz().value
fuzzy.FuzzyChoice(list(Image.StatusCode.values)).fuzz().value
if self.score == 0
else None
)

@lazy_attribute
def second_reference_pk(self):
if self.error is not None:
return FacialError(self.error).name
return fuzzy.FuzzyText()


class IgnoredFilenamePairFactory(DjangoModelFactory):
deduplication_set = SubFactory(DeduplicationSetFactory)
Expand Down

0 comments on commit e573cef

Please sign in to comment.