Skip to content

Commit

Permalink
chg ! rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed Jan 27, 2025
1 parent f2043c7 commit 3bebe7b
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 230 deletions.
48 changes: 0 additions & 48 deletions src/hope_dedup_engine/apps/api/deduplication/adapters.py

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 5.1.5 on 2025-01-23 10:16
# Generated by Django 5.1.5 on 2025-01-27 08:34

import hope_dedup_engine.apps.api.validators
from django.db import migrations, models
Expand All @@ -11,6 +11,11 @@ class Migration(migrations.Migration):
]

operations = [
migrations.AddField(
model_name="deduplicationset",
name="encoding_errors",
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name="config",
name="settings",
Expand Down
63 changes: 23 additions & 40 deletions src/hope_dedup_engine/apps/api/models/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from uuid import uuid4

from django.conf import settings
from django.db import models, transaction
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models, transaction

from hope_dedup_engine.apps.security.models import ExternalSystem
from hope_dedup_engine.types import (
Expand Down Expand Up @@ -103,6 +103,7 @@ def get_ignored_pairs(self) -> set[IgnoredPair]:
)
)

@transaction.atomic
def update_encodings(self, encodings: list[ImageEmbedding]) -> None:
with transaction.atomic():
fresh_self: DeduplicationSet = (
Expand All @@ -111,6 +112,7 @@ def update_encodings(self, encodings: list[ImageEmbedding]) -> None:
fresh_self.encodings.update(encodings)
fresh_self.save()

@transaction.atomic
def update_encoding_errors(self, errors: list[ImageEmbeddingError]) -> None:
with transaction.atomic():
fresh_self: DeduplicationSet = (
Expand All @@ -119,23 +121,9 @@ def update_encoding_errors(self, errors: list[ImageEmbeddingError]) -> None:
fresh_self.encoding_errors.update(errors)
fresh_self.save()

# def update_findings(
# self, findings: list[tuple[EntityEmbedding, EntityEmbedding, Score]]
# ) -> None:
# Finding.objects.bulk_create(
# [
# Finding(
# deduplication_set=self,
# first_reference_pk=first_reference_pk,
# second_reference_pk=second_reference_pk,
# score=score,
# )
# for (first_reference_pk, _), (second_reference_pk, _), score in findings
# ],
# ignore_conflicts=True,
# )

def update_findings(self, findings: list[tuple[EntityEmbedding, EntityEmbedding, Score]]) -> None:
def update_findings(
self, findings: list[tuple[EntityEmbedding, EntityEmbedding, Score]]
) -> None:
images = Image.objects.filter(deduplication_set=self).values(
"filename", "reference_pk"
)
Expand All @@ -145,35 +133,30 @@ def update_findings(self, findings: list[tuple[EntityEmbedding, EntityEmbedding,
findings_to_create = [
Finding(
deduplication_set=self,
first_filename=f[0],
first_reference_pk=filename_to_reference_pk.get(f[0]),
second_filename=f[1],
second_reference_pk=filename_to_reference_pk.get(f[1]),
score=f[2],
status_code=f[3],
first_reference_pk=filename_to_reference_pk.get(first_filename),
first_filename=first_filename,
second_reference_pk=filename_to_reference_pk.get(second_filename),
second_filename=second_filename,
score=score,
)
# TODO:
for f in findings
for first_filename, second_filename, score in findings
]
Finding.objects.bulk_create(findings_to_create, ignore_conflicts=True)



def update_finding_errors(
self, encoding_errors: list[EntityEmbeddingError]
) -> None:
Finding.objects.bulk_create(
[
Finding(
deduplication_set=self,
first_reference_pk=reference_pk,
second_reference_pk=error.name,
error=error.value,
)
for reference_pk, error in encoding_errors
],
ignore_conflicts=True,
)

errors_to_create = [
Finding(
deduplication_set=self,
first_reference_pk=reference_pk,
first_filename=filename,
status_code=Finding.StatusCode[error].value,
)
for reference_pk, filename, error in encoding_errors
]
Finding.objects.bulk_create(errors_to_create, ignore_conflicts=True)


class Image(models.Model):
Expand Down
44 changes: 0 additions & 44 deletions src/hope_dedup_engine/apps/faces/admin.py

This file was deleted.

12 changes: 6 additions & 6 deletions src/hope_dedup_engine/apps/faces/celery/tasks/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
find_similar_faces,
)
from hope_dedup_engine.config.celery import app
from hope_dedup_engine.constants import FacialError

# from hope_dedup_engine.constants import FacialError
from hope_dedup_engine.types import EntityEmbedding, Filename, SortedTuple
from hope_dedup_engine.utils import compact_pairs
from hope_dedup_engine.utils.celery.task_result import wrapped
Expand Down Expand Up @@ -64,10 +65,8 @@ def get_deduplication_set_embedding_pairs(
)

entity_embeddings = tuple(
(reference_pk, deduplication_set.encodings[filename])
for reference_pk, filename in deduplication_set.image_set.values_list(
"reference_pk", "filename"
)
(filename, deduplication_set.encodings[filename])
for filename in deduplication_set.image_set.values_list("filename", flat=True)
if filename in deduplication_set.encodings
)

Expand Down Expand Up @@ -98,7 +97,8 @@ def save_encoding_errors_in_findings(deduplication_set_id: str) -> None:
pk=deduplication_set_id
)
embedding_errors = [
(reference_pk, FacialError(deduplication_set.encoding_errors[filename]))
# (reference_pk, FacialError(deduplication_set.encoding_errors[filename]))
(reference_pk, filename, deduplication_set.encoding_errors[filename])
for reference_pk, filename in deduplication_set.image_set.values_list(
"reference_pk", "filename"
)
Expand Down
46 changes: 0 additions & 46 deletions src/hope_dedup_engine/apps/faces/celery/tasks/dnn_files.py

This file was deleted.

18 changes: 10 additions & 8 deletions src/hope_dedup_engine/apps/faces/services/facial.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from collections.abc import Generator, Iterable
from typing import Any, cast

# from hope_dedup_engine.types import EncodingType, FindingType, IgnoredPairType
from deepface import DeepFace

from hope_dedup_engine.apps.api.models import Finding
from hope_dedup_engine.apps.faces.managers import ImagesStorageManager

# from hope_dedup_engine.constants import FacialError
from hope_dedup_engine.types import (
Embedding,
Expand All @@ -14,8 +16,6 @@
ImageEmbedding,
ImageEmbeddingError,
)
from hope_dedup_engine.apps.faces.utils import is_facial_error
# from hope_dedup_engine.types import EncodingType, FindingType, IgnoredPairType

logger = logging.getLogger(__name__)

Expand All @@ -38,7 +38,9 @@ def encode_faces(
try:
result = DeepFace.represent(storage.load_image(filename), **(options or {}))
if len(result) > 1:
errors.append((filename, Finding.StatusCode.MULTIPLE_FACES_DETECTED.name))
errors.append(
(filename, Finding.StatusCode.MULTIPLE_FACES_DETECTED.name)
)
else:
embeddings.append((filename, cast(list[float], result[0]["embedding"])))
except TypeError as e:
Expand All @@ -59,13 +61,13 @@ def face_similarity(first: Embedding, second: Embedding, **options: Any) -> floa


def find_similar_faces(
encoded_pairs: Iterable[tuple[EntityEmbedding, EntityEmbedding]],
embedding_pairs: Iterable[tuple[EntityEmbedding, EntityEmbedding]],
dedupe_threshold: float,
options: dict[str, Any],
) -> Generator[tuple[EncodedFace, EncodedFace, float]]:
for first, second in encoded_pairs:
_, first_embedding = first
_, second_embedding = second
for first, second in embedding_pairs:
first_filename, first_embedding = first
second_filename, second_embedding = second
similarity = face_similarity(first_embedding, second_embedding, **options)
if similarity >= dedupe_threshold:
yield first, second, similarity
yield first_filename, second_filename, similarity
15 changes: 0 additions & 15 deletions src/hope_dedup_engine/apps/faces/utils.py

This file was deleted.

Loading

0 comments on commit 3bebe7b

Please sign in to comment.