Merge branch 'main' into fix/onecoin_ds_fit_predict

Toloka · Jan 14, 2025 · bf68ffc · bf68ffc
2 parents 9b2605c + a90bdca
commit bf68ffc
Show file tree

Hide file tree

Showing 11 changed files with 58 additions and 54 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -4,6 +4,9 @@ on:
   release:
     types: [ published ]
 
+env:
+  UV_SYSTEM_PYTHON: 1
+
 jobs:
   release:
     runs-on: ubuntu-latest
@@ -14,12 +17,14 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.12"
+          python-version: "3.13"
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: false
       - name: Install dependencies
-        run: |
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          uv pip install --system -e '.[dev,docs,learning]'
+        run: uv sync --all-extras
       - name: Build
-        run: python3 -m build --sdist --wheel .
+        run: uv run python3 -m build --sdist --wheel .
       - name: Publish
         uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -2,52 +2,57 @@ name: Tests
 
 on:
   push:
-    branches: [ main ]
   pull_request:
     branches: [ main ]
   workflow_dispatch:
 
+env:
+  UV_SYSTEM_PYTHON: 1
+
 jobs:
   build:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13" ]
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          cache-dependency-glob: |
+            **/pyproject.toml
       - name: Install dependencies
-        run: |
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          uv pip install --system -e '.[dev,docs,learning]'
+        run: uv sync --all-extras
       # NumPy 2.0 is not available for Python 3.8 and older versions, which leads to broken tests
       - name: Lint with Mypy (Python 3.8)
-        run: mypy crowdkit
+        run: uv run mypy crowdkit
         if: matrix.python-version == '3.8'
       - name: Lint with Mypy
-        run: mypy crowdkit tests
+        run: uv run mypy crowdkit tests
         if: matrix.python-version != '3.8'
       # pyupgrade is incompatible with Python 3.8 and older versions
       - name: Pre-Commit Check
         uses: pre-commit/[email protected]
         if: matrix.python-version != '3.8'
       - name: Test with pytest
-        run: coverage run --source crowdkit -m pytest
+        run: uv run coverage run --source crowdkit -m pytest
       - name: Codecov
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
-        run: codecov
-        if: matrix.python-version == '3.12'
+        run: uv run codecov
+        if: matrix.python-version == '3.13'
       - name: Run MkDocs
-        run: mkdocs build --strict
+        run: uv run mkdocs build --strict
       - name: Build wheel
         run: |
-          python3 -m build --sdist --wheel .
-          twine check --strict dist/*
+          uv run python3 -m build --sdist --wheel .
+          uv run twine check --strict dist/*
       - name: Validate CITATION.cff
         uses: citation-file-format/[email protected]
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,35 +1,35 @@
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.6.0
+  rev: v5.0.0
   hooks:
   - id: trailing-whitespace
   - id: end-of-file-fixer
   - id: check-yaml
   - id: check-added-large-files
 - repo: https://github.com/psf/black
-  rev: 24.8.0
+  rev: 24.10.0
   hooks:
   - id: black
 - repo: https://github.com/pycqa/isort
   rev: 5.13.2
   hooks:
   - id: isort
 - repo: https://github.com/pycqa/flake8
-  rev: 7.0.0
+  rev: 7.1.1
   hooks:
   - id: flake8
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.17.0
+  rev: v3.19.1
   hooks:
   - id: pyupgrade
 - repo: https://github.com/nbQA-dev/nbQA
-  rev: 1.7.1
+  rev: 1.9.1
   hooks:
    - id: nbqa-black
-     additional_dependencies: [black==24.8.0]
+     additional_dependencies: [black==24.10.0]
    - id: nbqa-isort
      additional_dependencies: [isort==5.13.2]
    - id: nbqa-flake8
-     additional_dependencies: [flake8==7.0.0]
+     additional_dependencies: [flake8==7.1.1]
    - id: nbqa-pyupgrade
-     additional_dependencies: [pyupgrade==3.17.0]
+     additional_dependencies: [pyupgrade==3.19.1]
diff --git a/crowdkit/aggregation/classification/mace.py b/crowdkit/aggregation/classification/mace.py
@@ -313,7 +313,9 @@ def _initialize(self, n_workers: int, n_labels: int) -> None:
             self.theta_priors_[:, 0] = self.alpha
             self.theta_priors_[:, 1] = self.beta
 
-            self.strategy_priors_ = np.ones((n_workers, n_labels)) * 10.0
+            self.strategy_priors_ = np.multiply(
+                10.0, np.ones((n_workers, n_labels)), dtype=np.float64
+            )
 
     def _e_step(
         self,

diff --git a/crowdkit/aggregation/classification/majority_vote.py b/crowdkit/aggregation/classification/majority_vote.py
@@ -23,12 +23,9 @@ class MajorityVote(BaseClassificationAggregator):
     resulting label will have the largest sum of weights.
 
 
-    {% note info %}
-
-     If two or more labels have the largest number of votes, the resulting
-     label will be the same for all tasks that have the same set of labels with the same number of votes.
-
-     {% endnote %}
+    Note:
+        If two or more labels have the largest number of votes, the resulting
+        label will be the same for all tasks that have the same set of labels with the same number of votes.
 
     Examples:
         Basic Majority Vote:

diff --git a/crowdkit/aggregation/classification/zero_based_skill.py b/crowdkit/aggregation/classification/zero_based_skill.py
@@ -20,12 +20,9 @@ class ZeroBasedSkill(BaseClassificationAggregator):
 
     This process is repeated until the labels change or exceed the number of iterations.
 
-    {% note info %}
-
-    It is necessary that all workers in the dataset that is sent to `predict` exist in responses to
-    the dataset that was sent to `fit`.
-
-    {% endnote %}
+    Note:
+        It is necessary that all workers in the dataset that is sent to `predict` exist in responses to
+        the dataset that was sent to `fit`.
 
     Examples:
         >>> from crowdkit.aggregation import ZeroBasedSkill

diff --git a/crowdkit/aggregation/image_segmentation/segmentation_rasa.py b/crowdkit/aggregation/image_segmentation/segmentation_rasa.py
@@ -105,7 +105,7 @@ def _aggregate_one(self, segmentations: "pd.Series[Any]") -> npt.NDArray[Any]:
         for _ in range(self.n_iter):
             weighted = self._segmentation_weighted(segmentations_np, weights)
             mv = weighted >= 0.5
-            weights = self._calculate_weights(segmentations_np, mv)
+            weights = self._calculate_weights(segmentations_np, mv)  # type: ignore[assignment,unused-ignore]
 
             if last_aggregated is not None:
                 delta = weighted - last_aggregated

diff --git a/crowdkit/aggregation/multilabel/binary_relevance.py b/crowdkit/aggregation/multilabel/binary_relevance.py
@@ -19,13 +19,11 @@ class BinaryRelevance(BaseClassificationAggregator):
     each label is treated as a class in binary classification problem and aggregated separately using
     aggregation algorithms for classification, e.g. Majority Vote or Dawid Skene.
 
-    {% note info %}
+    Note:
+        If this method is used for single-label classification, the output of the BinaryRelevance method may differ
+        from the output of the basic aggregator used for its intended purpose, since each class generates a binary
+        classification task, and therefore it is considered separately. For example, some objects may not have labels.
 
-    If this method is used for single-label classification, the output of the BinaryRelevance method may differ
-    from the output of the basic aggregator used for its intended purpose, since each class generates a binary
-    classification task, and therefore it is considered separately. For example, some objects may not have labels.
-
-    {% endnote %}
 
     Examples:
         >>> import pandas as pd

diff --git a/crowdkit/aggregation/utils.py b/crowdkit/aggregation/utils.py
@@ -191,7 +191,7 @@ def converter(series: "pd.Series[Any]") -> "pd.Series[Any]":
         series.name = name
         return series
 
-    return attr.ib(init=False, converter=converter, on_setattr=attr.setters.convert)  # type: ignore[no-any-return]
+    return attr.ib(init=False, converter=converter, on_setattr=attr.setters.convert)
 
 
 def add_skills_to_data(

diff --git a/crowdkit/metrics/data/_classification.py b/crowdkit/metrics/data/_classification.py
@@ -4,7 +4,7 @@
     "alpha_krippendorff",
 ]
 
-from typing import Any, Callable, Hashable, List, Optional, Tuple, Union, cast
+from typing import Any, Callable, Hashable, List, Optional, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -266,7 +266,5 @@ def alpha_krippendorff(
         0.4444444444444444
     """
     _check_answers(answers)
-    data: List[Tuple[Any, Hashable, Hashable]] = answers[
-        ["worker", "task", "label"]
-    ].values.tolist()
+    data = answers[["worker", "task", "label"]].values.tolist()
     return float(AnnotationTask(data, distance).alpha())
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ keywords = ["crowdsourcing", "data labeling", "answer aggregation", "truth infer
 license = {text = "Apache 2.0"}
 authors = [{name = "Toloka"}]
 classifiers = [
-    "Development Status :: 5 - Production/Stable",
+    "Development Status :: 6 - Mature",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: Apache Software License",
@@ -21,6 +21,7 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Programming Language :: Python :: Implementation :: CPython",
     "Topic :: Scientific/Engineering",
     "Topic :: Software Development",
@@ -74,7 +75,8 @@ docs = [
     "mkdocstrings-python"
 ]
 learning = [
-    "torch >= 1.6.0",
+    "torch >= 1.6.0,< 2.5.0; python_version < '3.9'",
+    "torch >= 2.5.0; python_version >= '3.9'",
     "transformers >= 4.0.0"
 ]