Skip to content

Commit

Permalink
Merge branch 'main' into fix/onecoin_ds_fit_predict
Browse files Browse the repository at this point in the history
  • Loading branch information
shenxiangzhuang committed Jan 14, 2025
2 parents 9b2605c + a90bdca commit bf68ffc
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 54 deletions.
15 changes: 10 additions & 5 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ on:
release:
types: [ published ]

env:
UV_SYSTEM_PYTHON: 1

jobs:
release:
runs-on: ubuntu-latest
Expand All @@ -14,12 +17,14 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
python-version: "3.13"
- name: Set up uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: false
- name: Install dependencies
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
uv pip install --system -e '.[dev,docs,learning]'
run: uv sync --all-extras
- name: Build
run: python3 -m build --sdist --wheel .
run: uv run python3 -m build --sdist --wheel .
- name: Publish
uses: pypa/gh-action-pypi-publish@release/v1
33 changes: 19 additions & 14 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,57 @@ name: Tests

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:

env:
UV_SYSTEM_PYTHON: 1

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13" ]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Set up uv
uses: astral-sh/setup-uv@v5
with:
cache-dependency-glob: |
**/pyproject.toml
- name: Install dependencies
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
uv pip install --system -e '.[dev,docs,learning]'
run: uv sync --all-extras
# NumPy 2.0 is not available for Python 3.8 and older versions, which leads to broken tests
- name: Lint with Mypy (Python 3.8)
run: mypy crowdkit
run: uv run mypy crowdkit
if: matrix.python-version == '3.8'
- name: Lint with Mypy
run: mypy crowdkit tests
run: uv run mypy crowdkit tests
if: matrix.python-version != '3.8'
# pyupgrade is incompatible with Python 3.8 and older versions
- name: Pre-Commit Check
uses: pre-commit/[email protected]
if: matrix.python-version != '3.8'
- name: Test with pytest
run: coverage run --source crowdkit -m pytest
run: uv run coverage run --source crowdkit -m pytest
- name: Codecov
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: codecov
if: matrix.python-version == '3.12'
run: uv run codecov
if: matrix.python-version == '3.13'
- name: Run MkDocs
run: mkdocs build --strict
run: uv run mkdocs build --strict
- name: Build wheel
run: |
python3 -m build --sdist --wheel .
twine check --strict dist/*
uv run python3 -m build --sdist --wheel .
uv run twine check --strict dist/*
- name: Validate CITATION.cff
uses: citation-file-format/[email protected]
with:
Expand Down
16 changes: 8 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 24.8.0
rev: 24.10.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
rev: 7.1.1
hooks:
- id: flake8
- repo: https://github.com/asottile/pyupgrade
rev: v3.17.0
rev: v3.19.1
hooks:
- id: pyupgrade
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.7.1
rev: 1.9.1
hooks:
- id: nbqa-black
additional_dependencies: [black==24.8.0]
additional_dependencies: [black==24.10.0]
- id: nbqa-isort
additional_dependencies: [isort==5.13.2]
- id: nbqa-flake8
additional_dependencies: [flake8==7.0.0]
additional_dependencies: [flake8==7.1.1]
- id: nbqa-pyupgrade
additional_dependencies: [pyupgrade==3.17.0]
additional_dependencies: [pyupgrade==3.19.1]
4 changes: 3 additions & 1 deletion crowdkit/aggregation/classification/mace.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,9 @@ def _initialize(self, n_workers: int, n_labels: int) -> None:
self.theta_priors_[:, 0] = self.alpha
self.theta_priors_[:, 1] = self.beta

self.strategy_priors_ = np.ones((n_workers, n_labels)) * 10.0
self.strategy_priors_ = np.multiply(
10.0, np.ones((n_workers, n_labels)), dtype=np.float64
)

def _e_step(
self,
Expand Down
9 changes: 3 additions & 6 deletions crowdkit/aggregation/classification/majority_vote.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,9 @@ class MajorityVote(BaseClassificationAggregator):
resulting label will have the largest sum of weights.
{% note info %}
If two or more labels have the largest number of votes, the resulting
label will be the same for all tasks that have the same set of labels with the same number of votes.
{% endnote %}
Note:
If two or more labels have the largest number of votes, the resulting
label will be the same for all tasks that have the same set of labels with the same number of votes.
Examples:
Basic Majority Vote:
Expand Down
9 changes: 3 additions & 6 deletions crowdkit/aggregation/classification/zero_based_skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,9 @@ class ZeroBasedSkill(BaseClassificationAggregator):
This process is repeated until the labels change or exceed the number of iterations.
{% note info %}
It is necessary that all workers in the dataset that is sent to `predict` exist in responses to
the dataset that was sent to `fit`.
{% endnote %}
Note:
It is necessary that all workers in the dataset that is sent to `predict` exist in responses to
the dataset that was sent to `fit`.
Examples:
>>> from crowdkit.aggregation import ZeroBasedSkill
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _aggregate_one(self, segmentations: "pd.Series[Any]") -> npt.NDArray[Any]:
for _ in range(self.n_iter):
weighted = self._segmentation_weighted(segmentations_np, weights)
mv = weighted >= 0.5
weights = self._calculate_weights(segmentations_np, mv)
weights = self._calculate_weights(segmentations_np, mv) # type: ignore[assignment,unused-ignore]

if last_aggregated is not None:
delta = weighted - last_aggregated
Expand Down
10 changes: 4 additions & 6 deletions crowdkit/aggregation/multilabel/binary_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ class BinaryRelevance(BaseClassificationAggregator):
each label is treated as a class in binary classification problem and aggregated separately using
aggregation algorithms for classification, e.g. Majority Vote or Dawid Skene.
{% note info %}
Note:
If this method is used for single-label classification, the output of the BinaryRelevance method may differ
from the output of the basic aggregator used for its intended purpose, since each class generates a binary
classification task, and therefore it is considered separately. For example, some objects may not have labels.
If this method is used for single-label classification, the output of the BinaryRelevance method may differ
from the output of the basic aggregator used for its intended purpose, since each class generates a binary
classification task, and therefore it is considered separately. For example, some objects may not have labels.
{% endnote %}
Examples:
>>> import pandas as pd
Expand Down
2 changes: 1 addition & 1 deletion crowdkit/aggregation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def converter(series: "pd.Series[Any]") -> "pd.Series[Any]":
series.name = name
return series

return attr.ib(init=False, converter=converter, on_setattr=attr.setters.convert) # type: ignore[no-any-return]
return attr.ib(init=False, converter=converter, on_setattr=attr.setters.convert)


def add_skills_to_data(
Expand Down
6 changes: 2 additions & 4 deletions crowdkit/metrics/data/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"alpha_krippendorff",
]

from typing import Any, Callable, Hashable, List, Optional, Tuple, Union, cast
from typing import Any, Callable, Hashable, List, Optional, Union, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -266,7 +266,5 @@ def alpha_krippendorff(
0.4444444444444444
"""
_check_answers(answers)
data: List[Tuple[Any, Hashable, Hashable]] = answers[
["worker", "task", "label"]
].values.tolist()
data = answers[["worker", "task", "label"]].values.tolist()
return float(AnnotationTask(data, distance).alpha())
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ keywords = ["crowdsourcing", "data labeling", "answer aggregation", "truth infer
license = {text = "Apache 2.0"}
authors = [{name = "Toloka"}]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Development Status :: 6 - Mature",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
Expand All @@ -21,6 +21,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Scientific/Engineering",
"Topic :: Software Development",
Expand Down Expand Up @@ -74,7 +75,8 @@ docs = [
"mkdocstrings-python"
]
learning = [
"torch >= 1.6.0",
"torch >= 1.6.0,< 2.5.0; python_version < '3.9'",
"torch >= 2.5.0; python_version >= '3.9'",
"transformers >= 4.0.0"
]

Expand Down

0 comments on commit bf68ffc

Please sign in to comment.