Skip to content

Commit

Permalink
chg ! refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed Jan 28, 2025
1 parent 3bebe7b commit 7ebb0f8
Show file tree
Hide file tree
Showing 12 changed files with 120 additions and 225 deletions.
2 changes: 1 addition & 1 deletion docs/src/did/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ To override only the **FACE_DISTANCE_THRESHOLD** parameter, your custom configur

```json
{
"face_distance_threshols": 0.7
"face_distance_threshold": 0.7
}
```

Expand Down
1 change: 0 additions & 1 deletion src/hope_dedup_engine/apps/api/deduplication/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from celery import Task

# from hope_dedup_engine.apps.api.deduplication.config import DeduplicationSetConfig
from hope_dedup_engine.apps.api.models import DedupJob, DeduplicationSet, Finding
from hope_dedup_engine.apps.api.models.config import DeduplicationSetConfig
from hope_dedup_engine.apps.api.utils.notification import send_notification
Expand Down
20 changes: 0 additions & 20 deletions src/hope_dedup_engine/apps/api/deduplication/registry.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by Django 5.1.5 on 2025-01-27 08:34
# Generated by Django 5.1.5 on 2025-01-28 08:54

import django.db.models.deletion
import hope_dedup_engine.apps.api.validators
from django.db import migrations, models

Expand Down Expand Up @@ -28,4 +29,14 @@ class Migration(migrations.Migration):
],
),
),
migrations.AlterField(
model_name="deduplicationset",
name="config",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="api.config",
),
),
]
81 changes: 4 additions & 77 deletions src/hope_dedup_engine/apps/api/models/deduplication.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from itertools import chain
from typing import Any, Final, override
from uuid import uuid4

Expand All @@ -7,18 +6,7 @@
from django.db import models, transaction

from hope_dedup_engine.apps.security.models import ExternalSystem
from hope_dedup_engine.types import (
Embedding,
EntityEmbedding,
EntityEmbeddingError,
Filename,
Finding,
IgnoredPair,
ImageEmbedding,
ImageEmbeddingError,
Score,
SortedTuple,
)
from hope_dedup_engine.types import ImageEmbedding, ImageEmbeddingError

REFERENCE_PK_LENGTH: Final[int] = 100

Expand Down Expand Up @@ -65,7 +53,9 @@ class State(models.IntegerChoices):
)
updated_at = models.DateTimeField(auto_now=True)
notification_url = models.CharField(max_length=255, null=True, blank=True)
config = models.ForeignKey("Config", null=True, on_delete=models.SET_NULL)
config = models.ForeignKey(
"Config", null=True, blank=True, on_delete=models.SET_NULL
)

# TODO: rename to embeddings as it's more correct term
encodings = models.JSONField(
Expand All @@ -79,31 +69,6 @@ class State(models.IntegerChoices):
def __str__(self) -> str:
return self.name or f"ID: {self.pk}"

def get_encodings(self) -> dict[Filename, Embedding]:
return self.encodings

def get_findings(self) -> list[Finding]:
return list(
self.finding_set.values_list(
"first_reference_pk", "second_reference_pk", "score"
)
)

def get_ignored_pairs(self) -> set[IgnoredPair]:
return set(
chain(
map(
SortedTuple,
self.ignoredreferencepkpair_set.values_list("first", "second"),
),
map(
SortedTuple,
list(self.ignoredfilenamepair_set.values_list("first", "second")),
),
)
)

@transaction.atomic
def update_encodings(self, encodings: list[ImageEmbedding]) -> None:
with transaction.atomic():
fresh_self: DeduplicationSet = (
Expand All @@ -112,7 +77,6 @@ def update_encodings(self, encodings: list[ImageEmbedding]) -> None:
fresh_self.encodings.update(encodings)
fresh_self.save()

@transaction.atomic
def update_encoding_errors(self, errors: list[ImageEmbeddingError]) -> None:
with transaction.atomic():
fresh_self: DeduplicationSet = (
Expand All @@ -121,43 +85,6 @@ def update_encoding_errors(self, errors: list[ImageEmbeddingError]) -> None:
fresh_self.encoding_errors.update(errors)
fresh_self.save()

def update_findings(
self, findings: list[tuple[EntityEmbedding, EntityEmbedding, Score]]
) -> None:
images = Image.objects.filter(deduplication_set=self).values(
"filename", "reference_pk"
)
filename_to_reference_pk = {
img["filename"]: img["reference_pk"] for img in images
} | {"": ""}
findings_to_create = [
Finding(
deduplication_set=self,
first_reference_pk=filename_to_reference_pk.get(first_filename),
first_filename=first_filename,
second_reference_pk=filename_to_reference_pk.get(second_filename),
second_filename=second_filename,
score=score,
)
for first_filename, second_filename, score in findings
]
Finding.objects.bulk_create(findings_to_create, ignore_conflicts=True)

def update_finding_errors(
self, encoding_errors: list[EntityEmbeddingError]
) -> None:

errors_to_create = [
Finding(
deduplication_set=self,
first_reference_pk=reference_pk,
first_filename=filename,
status_code=Finding.StatusCode[error].value,
)
for reference_pk, filename, error in encoding_errors
]
Finding.objects.bulk_create(errors_to_create, ignore_conflicts=True)


class Image(models.Model):
"""
Expand Down
60 changes: 0 additions & 60 deletions src/hope_dedup_engine/apps/api/utils/shema_manager.py

This file was deleted.

12 changes: 6 additions & 6 deletions src/hope_dedup_engine/apps/faces/celery/tasks/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from hope_dedup_engine.apps.faces.services.facial import (
encode_faces,
find_similar_faces,
get_ignored_pairs,
update_finding_errors,
update_findings,
)
from hope_dedup_engine.config.celery import app

# from hope_dedup_engine.constants import FacialError
from hope_dedup_engine.types import EntityEmbedding, Filename, SortedTuple
from hope_dedup_engine.utils import compact_pairs
from hope_dedup_engine.utils.celery.task_result import wrapped
Expand Down Expand Up @@ -46,7 +47,7 @@ def filter_ignored_pairs(
embedding_pairs: Iterable[tuple[EntityEmbedding, EntityEmbedding]],
deduplication_set: DeduplicationSet,
) -> Generator[tuple[EntityEmbedding, EntityEmbedding], None, None]:
ignored_pairs = deduplication_set.get_ignored_pairs()
ignored_pairs = get_ignored_pairs(deduplication_set)
for embedding_pair in embedding_pairs:
first, second = embedding_pair
first_reference_pk, _ = first
Expand Down Expand Up @@ -87,7 +88,7 @@ def find_duplicates(
dedupe_threshold=deduplicate_config.get("threshold"),
options=deduplicate_config,
)
deduplication_set.update_findings(findings)
update_findings(deduplication_set, findings)


@app.task
Expand All @@ -97,11 +98,10 @@ def save_encoding_errors_in_findings(deduplication_set_id: str) -> None:
pk=deduplication_set_id
)
embedding_errors = [
# (reference_pk, FacialError(deduplication_set.encoding_errors[filename]))
(reference_pk, filename, deduplication_set.encoding_errors[filename])
for reference_pk, filename in deduplication_set.image_set.values_list(
"reference_pk", "filename"
)
if filename in deduplication_set.encoding_errors
]
deduplication_set.update_finding_errors(embedding_errors)
update_finding_errors(deduplication_set, embedding_errors)
8 changes: 0 additions & 8 deletions src/hope_dedup_engine/apps/faces/models.py

This file was deleted.

Loading

0 comments on commit 7ebb0f8

Please sign in to comment.