diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
index 9e72bd321d..00c7dc2f14 100644
--- a/.azure-pipelines.yml
+++ b/.azure-pipelines.yml
@@ -1,5 +1,5 @@
 trigger:
-- master
+- main
 - "*.*.x"
 
 variables:
@@ -52,21 +52,21 @@ jobs:
 
   - script: |
       python -m pip install --upgrade pip
-      pip install wheel coverage
+      pip install wheel
       pip install .[dev,$(TEST_EXTRA)]
     displayName: 'Install dependencies'
     condition: eq(variables['DEPENDENCIES_VERSION'], 'latest')
 
   - script: |
       python -m pip install --pre --upgrade pip
-      pip install --pre wheel coverage
+      pip install --pre wheel
       pip install --pre .[dev,$(TEST_EXTRA)]
       pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"
     displayName: 'Install dependencies release candidates'
     condition: eq(variables['DEPENDENCIES_VERSION'], 'pre-release')
 
   - script: |
-      python -m pip install pip wheel tomli packaging pytest-cov
+      python -m pip install pip wheel tomli packaging
       pip install `python3 ci/scripts/min-deps.py pyproject.toml --extra dev test`
       pip install --no-deps .
     displayName: 'Install dependencies minimum version'
@@ -81,8 +81,7 @@ jobs:
     condition: eq(variables['TEST_TYPE'], 'standard')
 
   - script: |
-      coverage run -m pytest
-      coverage xml
+      pytest --cov --cov-report=xml --cov-context=test
     displayName: 'PyTest (coverage)'
     condition: eq(variables['TEST_TYPE'], 'coverage')
 
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 38fa7a0d4b..7e94695e47 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -14,7 +14,7 @@ body:
       required: true
     - label: I have confirmed this bug exists on the latest version of scanpy.
       required: true
-    - label: (optional) I have confirmed this bug exists on the master branch of scanpy.
+    - label: (optional) I have confirmed this bug exists on the main branch of scanpy.
       required: false
 - type: markdown
   attributes:
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index a0c4b12e00..1505f196f5 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,4 +1,4 @@
-blank_issues_enabled: false
+blank_issues_enabled: true
 contact_links:
   - name: Scanpy Community Forum
     url: https://discourse.scverse.org/
diff --git a/.github/workflows/check-pr.yml b/.github/workflows/check-pr.yml
index 21a1f361df..e74c041c8b 100644
--- a/.github/workflows/check-pr.yml
+++ b/.github/workflows/check-pr.yml
@@ -4,7 +4,6 @@ on:
   pull_request:
     branches:
       - main
-      - master
     types:
       # milestone changes
       - milestoned
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 18ea71fc98..709cc29b1a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.2.1
+    rev: v0.2.2
     hooks:
     -   id: ruff
         types_or: [python, pyi, jupyter]
@@ -26,7 +26,7 @@ repos:
     -   id: check-merge-conflict
     -   id: detect-private-key
     -   id: no-commit-to-branch
-        args: ["--branch=master", "--branch=main"]
+        args: ["--branch=main"]
 
 ci:
     autofix_prs: false
diff --git a/README.md b/README.md
index 219f51a5ca..53c4c35706 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![Downloads](https://static.pepy.tech/badge/scanpy)](https://pepy.tech/project/scanpy)
 [![Conda](https://img.shields.io/conda/dn/conda-forge/scanpy?logo=Anaconda)](https://anaconda.org/conda-forge/scanpy)
 [![Docs](https://readthedocs.com/projects/icb-scanpy/badge/?version=latest)](https://scanpy.readthedocs.io)
-[![Build Status](https://dev.azure.com/scverse/scanpy/_apis/build/status/theislab.scanpy?branchName=master)](https://dev.azure.com/scverse/scanpy/_build)
+[![Build Status](https://dev.azure.com/scverse/scanpy/_apis/build/status/theislab.scanpy?branchName=main)](https://dev.azure.com/scverse/scanpy/_build)
 [![Discourse topics](https://img.shields.io/discourse/posts?color=yellow&logo=discourse&server=https%3A%2F%2Fdiscourse.scverse.org)](https://discourse.scverse.org/)
 [![Chat](https://img.shields.io/badge/zulip-join_chat-%2367b08f.svg)](https://scverse.zulipchat.com)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org/)
diff --git a/docs/api/get.md b/docs/api/get.md
index 82f74ed411..039769d0a6 100644
--- a/docs/api/get.md
+++ b/docs/api/get.md
@@ -19,5 +19,6 @@ useful formats.
    get.obs_df
    get.var_df
    get.rank_genes_groups_df
+   get.aggregate
 
 ```
diff --git a/docs/dev/code.md b/docs/dev/code.md
index 61fffb31cb..06c267951b 100644
--- a/docs/dev/code.md
+++ b/docs/dev/code.md
@@ -20,5 +20,5 @@ We ignore a couple of flake8 checks which are documented in the .flake8 file in
 To learn how to ignore checks per line please read
 [flake8 violations](https://flake8.pycqa.org/en/latest/user/violations.html).
 Additionally, we use Scanpy’s
-[EditorConfig](https://github.com/scverse/scanpy/blob/master/.editorconfig),
+[EditorConfig](https://github.com/scverse/scanpy/blob/main/.editorconfig),
 so using an editor/IDE with support for both is helpful.
diff --git a/docs/dev/getting-set-up.md b/docs/dev/getting-set-up.md
index b7124303f5..750af53e91 100644
--- a/docs/dev/getting-set-up.md
+++ b/docs/dev/getting-set-up.md
@@ -21,7 +21,7 @@ This is very straight forward if you're using [GitHub's CLI](https://cli.github.
 $ gh repo fork scverse/scanpy --clone --remote
 ```
 
-This will fork the repo to your github account, create a clone of the repo on your current machine, add our repository as a remote, and set the `master` development branch to track our repository.
+This will fork the repo to your github account, create a clone of the repo on your current machine, add our repository as a remote, and set the `main` development branch to track our repository.
 
 To do this manually, first make a fork of the repository by clicking the "fork" button on our main github package. Then, on your machine, run:
 
@@ -32,7 +32,7 @@ git clone https://github.com/{your-username}/scanpy.git
 cd scanpy
 # Add our repository as a remote
 git remote add upstream https://github.com/scverse/scanpy.git
-# git branch --set-upstream-to "upstream/master"
+# git branch --set-upstream-to "upstream/main"
 ```
 
 ### `pre-commit`
@@ -61,11 +61,11 @@ If you choose not to run the hooks on each commit, you can run them manually wit
 ### Creating a branch for your feature
 
 All development should occur in branches dedicated to the particular work being done.
-Additionally, unless you are a maintainer, all changes should be directed at the `master` branch.
+Additionally, unless you are a maintainer, all changes should be directed at the `main` branch.
 You can create a branch with:
 
 ```shell
-git checkout master                 # Starting from the master branch
+git checkout main                   # Starting from the main branch
 git pull                            # Syncing with the repo
 git checkout -b {your-branch-name}  # Making and changing to the new branch
 ```
diff --git a/docs/dev/release.md b/docs/dev/release.md
index def899ac67..f4bcb528a6 100644
--- a/docs/dev/release.md
+++ b/docs/dev/release.md
@@ -67,4 +67,4 @@ If you want to replicate the process more exactly, make sure you are careful,
 and create a version tag before building (make sure you delete it after uploading to TestPyPI!).
 
 [hatch-build]: https://hatch.pypa.io/latest/config/build/
-[publish workflow]: https://github.com/scverse/scanpy/tree/master/.github/workflows/publish.yml
+[publish workflow]: https://github.com/scverse/scanpy/tree/main/.github/workflows/publish.yml
diff --git a/docs/dev/testing.md b/docs/dev/testing.md
index e0297cff32..46ab9f129c 100644
--- a/docs/dev/testing.md
+++ b/docs/dev/testing.md
@@ -21,7 +21,7 @@ It can take a while to run the whole test suite. There are a few ways to cut dow
 
 ## Writing tests
 
-You can refer to the [existing test suite](https://github.com/scverse/scanpy/tree/master/scanpy/tests) for examples.
+You can refer to the [existing test suite](https://github.com/scverse/scanpy/tree/main/scanpy/tests) for examples.
 If you haven't written tests before, Software Carpentry has an [in-depth guide](https://katyhuff.github.io/2016-07-11-scipy/testing/01-basics.html) on the topic.
 
 We highly recommend using [Test Driven Development](https://en.wikipedia.org/wiki/Test-driven_development) when contributing code.
diff --git a/docs/extensions/git_ref.py b/docs/extensions/git_ref.py
index 1f57de7a25..dd61f0cce0 100644
--- a/docs/extensions/git_ref.py
+++ b/docs/extensions/git_ref.py
@@ -32,7 +32,7 @@ def get() -> str | None:
         try:
             git_ref = git("rev-parse", "HEAD")
         except Exception:
-            git_ref = "master"
+            git_ref = "main"
     return git_ref
 
 
diff --git a/docs/release-notes/1.10.0.md b/docs/release-notes/1.10.0.md
index 71ab684386..1b338fd5ed 100644
--- a/docs/release-notes/1.10.0.md
+++ b/docs/release-notes/1.10.0.md
@@ -1,50 +1,55 @@
-### 1.10.0 {small}`the future`
+### 1.10.0rc2 {small}`2024-02-22`
+
+```{rubric} Bug fixes
+```
+
+* Fix pytest deprecation warning {pr}`2879` {smaller}`P Angerer`
+
+### 1.10.0rc1 {small}`2024-02-22`
 
 ```{rubric} Features
 ```
 
+* {func}`~scanpy.pp.scrublet` and {func}`~scanpy.pp.scrublet_simulate_doublets` were moved from {mod}`scanpy.external.pp` to {mod}`scanpy.pp`. The `scrublet` implementation is now maintained as part of scanpy {pr}`2703` {smaller}`P Angerer`
+* {func}`scanpy.pp.pca`, {func}`scanpy.pp.scale`, {func}`scanpy.pl.embedding`, and {func}`scanpy.experimental.pp.normalize_pearson_residuals_pca` now support a `mask` parameter {pr}`2272` {smaller}`C Bright, T Marcella, & P Angerer`
+* Enhanced dask support for some internal utilities, paving the way for more extensive dask support {pr}`2696` {smaller}`P Angerer`
+* {func}`scanpy.pp.highly_variable_genes` supports dask for the default `seurat` and `cell_ranger` flavors {pr}`2809` {smaller}`P Angerer`
+* New function {func}`scanpy.get.aggregate` which allows grouped aggregations over your data. Useful for pseudobulking! {pr}`2590` {smaller}`Isaac Virshup` {smaller}`Ilan Gold` {smaller}`Jon Bloom`
+* {func}`scanpy.pp.neighbors` now has a `transformer` argument allowing the use of different ANN/ KNN libraries {pr}`2536` {smaller}`P Angerer`
+* {func}`scanpy.experimental.pp.highly_variable_genes` using `flavor='pearson_residuals'` now uses numba for variance computation and is faster {pr}`2612` {smaller}`S Dicks & P Angerer`
+* {func}`scanpy.tl.leiden` now offers `igraph`'s implementation of the leiden algorithm via  via `flavor` when set to `igraph`. `leidenalg`'s implementation is still default, but discouraged.  {pr}`2815` {smaller}`I Gold`
+* {func}`scanpy.pp.highly_variable_genes` has new flavor `seurat_v3_paper` that is in its implementation consistent with the paper description in Stuart et al 2018. {pr}`2792` {smaller}`E Roellin`
 * {func}`scanpy.datasets.blobs` now accepts a `random_state` argument {pr}`2683` {smaller}`E Roellin`
 * {func}`scanpy.pp.pca` and {func}`scanpy.pp.regress_out` now accept a layer argument {pr}`2588` {smaller}`S Dicks`
 * {func}`scanpy.pp.subsample` with `copy=True` can now be called in backed mode {pr}`2624` {smaller}`E Roellin`
-* {func}`scanpy.pp.neighbors` now has a `transformer` argument allowing for more flexibility {pr}`2536` {smaller}`P Angerer`
-* {func}`scanpy.experimental.pp.highly_variable_genes` using `flavor='pearson_residuals'`
-  now uses numba for variance computation {pr}`2612` {smaller}`S Dicks & P Angerer`
 * {func}`scanpy.external.pp.harmony_integrate` now runs with 64 bit floats improving reproducibility {pr}`2655` {smaller}`S Dicks`
-* {func}`~scanpy.pp.scrublet` and {func}`~scanpy.pp.scrublet_simulate_doublets` were moved from {mod}`scanpy.external.pp` to {mod}`scanpy.pp`.
-  The `scrublet` implementation is now maintained as part of scanpy {pr}`2703` {smaller}`P Angerer`
-* Enhanced dask support for some internal utilities, paving the way for more extensive dask support {pr}`2696` {smaller}`P Angerer`
-* {func}`scanpy.pp.pca`, {func}`scanpy.pp.scale`, {func}`scanpy.pl.embedding`, and {func}`scanpy.experimental.pp.normalize_pearson_residuals_pca`
-  now support a `mask` parameter {pr}`2272` {smaller}`C Bright, T Marcella, & P Angerer`
 * {func}`scanpy.tl.rank_genes_groups` no longer warns that it's default was changed from t-test_overestim_var to t-test {pr}`2798` {smaller}`L Heumos`
-* {func}`scanpy.pp.highly_variable_genes` has new flavor `seurat_v3_paper` that is in its implementation consistent with the paper description in Stuart et al 2018. {pr}`2792` {smaller}`E Roellin`
-* {func}`scanpy.pp.highly_variable_genes` supports dask for the default `seurat` and `cell_ranger` flavors {pr}`2809` {smaller}`P Angerer`
-* Auto conversion of strings to collections in `scanpy.pp.calculate_qc_metrics` {pr}`2859` {smaller}`N Teyssier`
+* `scanpy.pp.calculate_qc_metrics` now allows `qc_vars` to be passed as a string {pr}`2859` {smaller}`N Teyssier`
 
 ```{rubric} Docs
 ```
+
+* Re-add search-as-you-type, this time via `readthedocs-sphinx-search` {pr}`2805` {smaller}`P Angerer`
 * Fixed a lot of broken usage examples {pr}`2605` {smaller}`P Angerer`
 * Improved harmonization of return field of `sc.pp` and `sc.tl` functions {pr}`2742` {smaller}`E Roellin`
-* Re-add search-as-you-type, this time via `readthedocs-sphinx-search` {pr}`2805` {smaller}`P Angerer`
 * Improved docs for `percent_top` argument of {func}`~scanpy.pp.calculate_qc_metrics` {pr}`2849` {smaller}`I Virshup`
 
 ```{rubric} Bug fixes
 ```
 
 * Updated {func}`~scanpy.read_visium` such that it can read spaceranger 2.0 files {smaller}`L Lehner`
-* Fix {func}`~scanpy.pp.normalize_total` {pr}`2466` {smaller}`P Angerer`
-* Fix testing package build {pr}`2468` {smaller}`P Angerer`
+* Fix {func}`~scanpy.pp.normalize_total` for dask {pr}`2466` {smaller}`P Angerer`
 * Fix setting `sc.settings.verbosity` in some cases {pr}`2605` {smaller}`P Angerer`
 * Fix all remaining pandas warnings {pr}`2789` {smaller}`P Angerer`
 * Fix some annoying plotting warnings around violin plots {pr}`2844` {smaller}`P Angerer`
 * Scanpy now has a test job which tests against the minumum versions of the dependencies. In the process of implementing this, many bugs associated with using older versions of `pandas`, `anndata`, `numpy`, and `matplotlib` were fixed. {pr}`2816` {smaller}`I Virshup`
+* Fix warnings caused by internal usage of `pandas.DataFrame.stack` with `pandas>=2.1` {pr}`2864`{smaller}`I Virshup`
 
 ```{rubric} Development
 ```
 
 * Scanpy is now tested against python 3.12 {pr}`2863` {smaller}`ivirshup`
-
-```{rubric} Ecosystem
-```
+* Fix testing package build {pr}`2468` {smaller}`P Angerer`
 
 ```{rubric} Deprecations
 ```
@@ -52,3 +57,4 @@
 * Dropped support for Python 3.8. [More details here](https://numpy.org/neps/nep-0029-deprecation_policy.html). {pr}`2695` {smaller}`P Angerer`
 * Deprecated specifying large numbers of function parameters by position as opposed to by name/keyword in all public APIs.
   e.g. prefer `sc.tl.umap(adata, min_dist=0.1, spread=0.8)` over `sc.tl.umap(adata, 0.1, 0.8)` {pr}`2702` {smaller}`P Angerer`
+* Dropped support for `umap<0.5` for performance reasons. {pr}`2870` {smaller}`P Angerer`
diff --git a/docs/release-notes/1.11.0.md b/docs/release-notes/1.11.0.md
new file mode 100644
index 0000000000..fb6c33a443
--- /dev/null
+++ b/docs/release-notes/1.11.0.md
@@ -0,0 +1,13 @@
+### 1.11.0 {small}`the future`
+
+```{rubric} Features
+```
+
+```{rubric} Docs
+```
+
+```{rubric} Bug fixes
+```
+
+```{rubric} Deprecations
+```
diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md
index 72160d4eac..0dd4e2a020 100644
--- a/docs/release-notes/index.md
+++ b/docs/release-notes/index.md
@@ -2,6 +2,11 @@
 
 # Release notes
 
+## Version 1.11
+
+```{include} /release-notes/1.11.0.md
+```
+
 ## Version 1.10
 
 ```{include} /release-notes/1.10.0.md
diff --git a/pyproject.toml b/pyproject.toml
index 9046317d07..3acaed64b3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,7 +63,7 @@ dependencies = [
     "natsort",
     "joblib",
     "numba>=0.56",
-    "umap-learn>=0.3.10",
+    "umap-learn>=0.5,!=0.5.0",
     "pynndescent>=0.5",
     "packaging>=21.3",
     "session-info",
@@ -87,6 +87,7 @@ test-min = [
     "pytest>=7.4.2",
     "pytest-nunit",
     "pytest-mock",
+    "pytest-cov",
     "profimp",
 ]
 test = [
@@ -159,7 +160,6 @@ addopts = [
     "--import-mode=importlib",
     "--strict-markers",
     "--doctest-modules",
-    "-pscanpy.testing._pytest",
 ]
 testpaths = ["scanpy"]
 norecursedirs = ["scanpy/tests/_images"]
diff --git a/scanpy/__init__.py b/scanpy/__init__.py
index d6a2e46770..be2a10f5bf 100644
--- a/scanpy/__init__.py
+++ b/scanpy/__init__.py
@@ -1,6 +1,8 @@
 """Single-Cell Analysis in Python."""
 from __future__ import annotations
 
+import sys
+
 try:  # See https://github.com/maresb/hatch-vcs-footgun-example
     from setuptools_scm import get_version
 
@@ -21,6 +23,11 @@
 
 # the actual API
 # (start with settings as several tools are using it)
+
+from ._settings import Verbosity, settings
+
+set_figure_params = settings.set_figure_params
+
 from anndata import (
     AnnData,
     concat,
@@ -38,15 +45,10 @@
 from . import plotting as pl
 from . import preprocessing as pp
 from . import tools as tl
-from ._settings import Verbosity, settings
 from .neighbors import Neighbors
 from .readwrite import read, read_10x_h5, read_10x_mtx, read_visium, write
 
-set_figure_params = settings.set_figure_params
-
 # has to be done at the end, after everything has been imported
-import sys
-
 sys.modules.update({f"{__name__}.{m}": globals()[m] for m in ["tl", "pp", "pl"]})
 from ._utils import annotate_doc_types
 
diff --git a/scanpy/_utils/__init__.py b/scanpy/_utils/__init__.py
index 6d21a11fd5..e182ab3840 100644
--- a/scanpy/_utils/__init__.py
+++ b/scanpy/_utils/__init__.py
@@ -7,9 +7,11 @@
 
 import importlib.util
 import inspect
+import random
 import sys
 import warnings
 from collections import namedtuple
+from contextlib import contextmanager
 from enum import Enum
 from functools import partial, singledispatch, wraps
 from textwrap import dedent
@@ -20,10 +22,10 @@
 import numpy as np
 from anndata import AnnData
 from anndata import __version__ as anndata_version
-from numpy import random
 from numpy.typing import NDArray
 from packaging import version
 from scipy import sparse
+from sklearn.utils import check_random_state
 
 from .. import logging as logg
 from .._compat import DaskArray
@@ -45,16 +47,43 @@ def __repr__(self) -> str:
 _empty = Empty.token
 
 # e.g. https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
-AnyRandom = Union[int, random.RandomState, None]  # maybe in the future random.Generator
+# maybe in the future random.Generator
+AnyRandom = Union[int, np.random.RandomState, None]
 
-EPS = 1e-15
 
+class RNGIgraph:
+    """
+    Random number generator for ipgraph so global seed is not changed.
+    See :func:`igraph.set_random_number_generator` for the requirements.
+    """
+
+    def __init__(self, random_state: int = 0) -> None:
+        self._rng = check_random_state(random_state)
 
-def check_versions():
-    from .._compat import pkg_version
+    def __getattr__(self, attr: str):
+        return getattr(self._rng, "normal" if attr == "gauss" else attr)
+
+
+@contextmanager
+def set_igraph_random_state(random_state: int):
+    try:
+        import igraph
+    except ImportError:
+        raise ImportError(
+            "Please install igraph: `conda install -c conda-forge igraph` or `pip3 install igraph`."
+        )
+    rng = RNGIgraph(random_state)
+    try:
+        igraph.set_random_number_generator(rng)
+        yield None
+    finally:
+        igraph.set_random_number_generator(random)
 
-    umap_version = pkg_version("umap-learn")
 
+EPS = 1e-15
+
+
+def check_versions():
     if version.parse(anndata_version) < version.parse("0.6.10"):
         from .. import __version__
 
@@ -63,15 +92,6 @@ def check_versions():
             f"not {anndata_version}.\nRun `pip install anndata -U --no-deps`."
         )
 
-    if umap_version < version.parse("0.3.0"):
-        from . import __version__
-
-        # make this a warning, not an error
-        # it might be useful for people to still be able to run it
-        logg.warning(
-            f"Scanpy {__version__} needs umap " f"version >=0.3.0, not {umap_version}."
-        )
-
 
 def getdoc(c_or_f: Callable | type) -> str | None:
     if getattr(c_or_f, "__doc__", None) is None:
@@ -459,10 +479,10 @@ def moving_average(a: np.ndarray, n: int):
     return ret[n - 1 :] / n
 
 
-def get_random_state(seed: AnyRandom) -> random.RandomState:
+def get_random_state(seed: AnyRandom) -> np.random.RandomState:
     if isinstance(seed, np.random.RandomState):
         return seed
-    return random.RandomState(seed)
+    return np.random.RandomState(seed)
 
 
 # --------------------------------------------------------------------------------
@@ -841,3 +861,13 @@ def _choose_graph(adata, obsp, neighbors_key):
                 "to compute a neighborhood graph."
             )
         return neighbors["connectivities"]
+
+
+def _resolve_axis(
+    axis: Literal["obs", 0, "var", 1],
+) -> tuple[Literal[0], Literal["obs"]] | tuple[Literal[1], Literal["var"]]:
+    if axis in {0, "obs"}:
+        return (0, "obs")
+    if axis in {1, "var"}:
+        return (1, "var")
+    raise ValueError(f"`axis` must be either 0, 1, 'obs', or 'var', was {axis!r}")
diff --git a/scanpy/get/__init__.py b/scanpy/get/__init__.py
index 08567cfc6e..56c0d3c130 100644
--- a/scanpy/get/__init__.py
+++ b/scanpy/get/__init__.py
@@ -1,7 +1,6 @@
-# Public
-# Private
 from __future__ import annotations
 
+from ._aggregated import aggregate
 from .get import (
     _check_mask,
     _get_obs_rep,
@@ -15,6 +14,7 @@
     "_check_mask",
     "_get_obs_rep",
     "_set_obs_rep",
+    "aggregate",
     "obs_df",
     "rank_genes_groups_df",
     "var_df",
diff --git a/scanpy/get/_aggregated.py b/scanpy/get/_aggregated.py
new file mode 100644
index 0000000000..159f7572e3
--- /dev/null
+++ b/scanpy/get/_aggregated.py
@@ -0,0 +1,387 @@
+from __future__ import annotations
+
+from functools import singledispatch
+from typing import TYPE_CHECKING, Literal, Union, get_args
+
+import numpy as np
+import pandas as pd
+from anndata import AnnData, utils
+from scipy import sparse
+
+from .._utils import _resolve_axis
+from .get import _check_mask
+
+if TYPE_CHECKING:
+    from collections.abc import Collection, Iterable
+
+    from numpy.typing import NDArray
+
+Array = Union[np.ndarray, sparse.csc_matrix, sparse.csr_matrix]
+AggType = Literal["count_nonzero", "mean", "sum", "var"]
+
+
+class Aggregate:
+    """\
+    Functionality for generic grouping and aggregating.
+
+    There is currently support for count_nonzero, sum, mean, and variance.
+
+    **Implementation**
+
+    Moments are computed using weighted sum aggregation of data by some feature
+    via multiplication by a sparse coordinate matrix A.
+
+    Runtime is effectively computation of the product `A @ X`, i.e. the count of (non-zero)
+    entries in X with multiplicity the number of group memberships for that entry.
+    This is `O(data)` for partitions (each observation belonging to exactly one group),
+    independent of the number of groups.
+
+    Params
+    ------
+    groupby
+        :class:`~pandas.Categorical` containing values for grouping by.
+    data
+        Data matrix for aggregation.
+    mask
+        Mask to be used for aggregation.
+    """
+
+    def __init__(
+        self,
+        groupby: pd.Categorical,
+        data: Array,
+        *,
+        mask: NDArray[np.bool_] | None = None,
+    ) -> None:
+        self.groupby = groupby
+        self.indicator_matrix = sparse_indicator(groupby, mask=mask)
+        self.data = data
+
+    groupby: pd.Categorical
+    indicator_matrix: sparse.coo_matrix
+    data: Array
+
+    def count_nonzero(self) -> NDArray[np.integer]:
+        """\
+        Count the number of observations in each group.
+
+        Returns
+        -------
+        Array of counts.
+        """
+        # pattern = self.data._with_data(np.broadcast_to(1, len(self.data.data)))
+        # return self.indicator_matrix @ pattern
+        return self.indicator_matrix @ (self.data != 0)
+
+    def sum(self) -> Array:
+        """\
+        Compute the sum per feature per group of observations.
+
+        Returns
+        -------
+        Array of sum.
+        """
+        return utils.asarray(self.indicator_matrix @ self.data)
+
+    def mean(self) -> Array:
+        """\
+        Compute the mean per feature per group of observations.
+
+        Returns
+        -------
+        Array of mean.
+        """
+        return (
+            utils.asarray(self.indicator_matrix @ self.data)
+            / np.bincount(self.groupby.codes)[:, None]
+        )
+
+    def mean_var(self, dof: int = 1) -> tuple[np.ndarray, np.ndarray]:
+        """\
+        Compute the count, as well as mean and variance per feature, per group of observations.
+
+        The formula `Var(X) = E(X^2) - E(X)^2` suffers loss of precision when the variance is a
+        very small fraction of the squared mean. In particular, when X is constant, the formula may
+        nonetheless be non-zero. By default, our implementation resets the variance to exactly zero
+        when the computed variance, relative to the squared mean, nears limit of precision of the
+        floating-point significand.
+
+        Params
+        ------
+        dof
+            Degrees of freedom for variance.
+
+        Returns
+        -------
+        Object with `count`, `mean`, and `var` attributes.
+        """
+        assert dof >= 0
+
+        group_counts = np.bincount(self.groupby.codes)
+        mean_ = self.mean()
+        # sparse matrices do not support ** for elementwise power.
+        mean_sq = (
+            utils.asarray(self.indicator_matrix @ _power(self.data, 2))
+            / group_counts[:, None]
+        )
+        sq_mean = mean_**2
+        var_ = mean_sq - sq_mean
+        # TODO: Why these values exactly? Because they are high relative to the datatype?
+        # (unchanged from original code: https://github.com/scverse/anndata/pull/564)
+        precision = 2 << (42 if self.data.dtype == np.float64 else 20)
+        # detects loss of precision in mean_sq - sq_mean, which suggests variance is 0
+        var_[precision * var_ < sq_mean] = 0
+        if dof != 0:
+            var_ *= (group_counts / (group_counts - dof))[:, np.newaxis]
+        return mean_, var_
+
+
+def _power(X: Array, power: float | int) -> Array:
+    """\
+    Generate elementwise power of a matrix.
+
+    Needed for non-square sparse matrices because they do not support `**` so the `.power` function is used.
+
+    Params
+    ------
+    X
+        Matrix whose power is to be raised.
+    power
+        Integer power value
+
+    Returns
+    -------
+    Matrix whose power has been raised.
+    """
+    return X**power if isinstance(X, np.ndarray) else X.power(power)
+
+
+@singledispatch
+def aggregate(
+    adata: AnnData,
+    by: str | Collection[str],
+    func: AggType | Iterable[AggType],
+    *,
+    axis: Literal["obs", 0, "var", 1] | None = None,
+    mask: NDArray[np.bool_] | str | None = None,
+    dof: int = 1,
+    layer: str | None = None,
+    obsm: str | None = None,
+    varm: str | None = None,
+) -> AnnData:
+    """\
+    Aggregate data matrix based on some categorical grouping.
+
+    This function is useful for pseudobulking as well as plotting.
+
+    Aggregation to perform is specified by `func`, which can be a single metric or a
+    list of metrics. Each metric is computed over the group and results in a new layer
+    in the output `AnnData` object.
+
+    If none of `layer`, `obsm`, or `varm` are passed in, `X` will be used for aggregation data.
+    If `func` only has length 1 or is just an `AggType`, then aggregation data is written to `X`.
+    Otherwise, it is written to `layers` or `xxxm` as appropriate for the dimensions of the aggregation data.
+
+    Params
+    ------
+    adata
+        :class:`~anndata.AnnData` to be aggregated.
+    by
+        Key of the column to be grouped-by.
+    func
+        How to aggregate.
+    axis
+        Axis on which to find group by column.
+    mask
+        Boolean mask (or key to column containing mask) to apply along the axis.
+    dof
+        Degrees of freedom for variance. Defaults to 1.
+    layer
+        If not None, key for aggregation data.
+    obsm
+        If not None, key for aggregation data.
+    varm
+        If not None, key for aggregation data.
+
+    Returns
+    -------
+    Aggregated :class:`~anndata.AnnData`.
+
+    Examples
+    --------
+
+    Calculating mean expression and number of nonzero entries per cluster:
+
+    >>> import scanpy as sc, pandas as pd
+    >>> pbmc = sc.datasets.pbmc3k_processed().raw.to_adata()
+    >>> pbmc.shape
+    (2638, 13714)
+    >>> aggregated = sc.get.aggregate(pbmc, by="louvain", func=["mean", "count_nonzero"])
+    >>> aggregated
+    AnnData object with n_obs × n_vars = 8 × 13714
+        obs: 'louvain'
+        var: 'n_cells'
+        layers: 'mean', 'count_nonzero'
+
+    We can group over multiple columns:
+
+    >>> pbmc.obs["percent_mito_binned"] = pd.cut(pbmc.obs["percent_mito"], bins=5)
+    >>> sc.get.aggregate(pbmc, by=["louvain", "percent_mito_binned"], func=["mean", "count_nonzero"])
+    AnnData object with n_obs × n_vars = 40 × 13714
+        obs: 'louvain', 'percent_mito_binned'
+        var: 'n_cells'
+        layers: 'mean', 'count_nonzero'
+
+    Note that this filters out any combination of groups that wasn't present in the original data.
+    """
+    if axis is None:
+        axis = 1 if varm else 0
+    axis, axis_name = _resolve_axis(axis)
+    if mask is not None:
+        mask = _check_mask(adata, mask, axis_name)
+    data = adata.X
+    if sum(p is not None for p in [varm, obsm, layer]) > 1:
+        raise TypeError("Please only provide one (or none) of varm, obsm, or layer")
+
+    if varm is not None:
+        if axis != 1:
+            raise ValueError("varm can only be used when axis is 1")
+        data = adata.varm[varm]
+    elif obsm is not None:
+        if axis != 0:
+            raise ValueError("obsm can only be used when axis is 0")
+        data = adata.obsm[obsm]
+    elif layer is not None:
+        data = adata.layers[layer]
+        if axis == 1:
+            data = data.T
+    elif axis == 1:
+        # i.e., all of `varm`, `obsm`, `layers` are None so we use `X` which must be transposed
+        data = data.T
+
+    dim_df = getattr(adata, axis_name)
+    categorical, new_label_df = _combine_categories(dim_df, by)
+    # Actual computation
+    layers = aggregate(
+        data,
+        by=categorical,
+        func=func,
+        mask=mask,
+        dof=dof,
+    )
+    result = AnnData(
+        layers=layers,
+        obs=new_label_df,
+        var=getattr(adata, "var" if axis == 0 else "obs"),
+    )
+
+    if axis == 1:
+        return result.T
+    else:
+        return result
+
+
+@aggregate.register(np.ndarray)
+@aggregate.register(sparse.spmatrix)
+def aggregate_array(
+    data,
+    by: pd.Categorical,
+    func: AggType | Iterable[AggType],
+    *,
+    mask: NDArray[np.bool_] | None = None,
+    dof: int = 1,
+) -> dict[AggType, np.ndarray]:
+    groupby = Aggregate(groupby=by, data=data, mask=mask)
+    result = {}
+
+    funcs = set([func] if isinstance(func, str) else func)
+    if unknown := funcs - set(get_args(AggType)):
+        raise ValueError(f"func {unknown} is not one of {get_args(AggType)}")
+
+    if "sum" in funcs:  # sum is calculated separately from the rest
+        agg = groupby.sum()
+        result["sum"] = agg
+    # here and below for count, if var is present, these can be calculate alongside var
+    if "mean" in funcs and "var" not in funcs:
+        agg = groupby.mean()
+        result["mean"] = agg
+    if "count_nonzero" in funcs:
+        result["count_nonzero"] = groupby.count_nonzero()
+    if "var" in funcs:
+        mean_, var_ = groupby.mean_var(dof)
+        result["var"] = var_
+        if "mean" in funcs:
+            result["mean"] = mean_
+
+    return result
+
+
+def _combine_categories(
+    label_df: pd.DataFrame, cols: Collection[str] | str
+) -> tuple[pd.Categorical, pd.DataFrame]:
+    """
+    Returns both the result categories and a dataframe labelling each row
+    """
+    from itertools import product
+
+    if isinstance(cols, str):
+        cols = [cols]
+
+    df = pd.DataFrame(
+        {c: pd.Categorical(label_df[c]).remove_unused_categories() for c in cols},
+    )
+    n_categories = [len(df[c].cat.categories) for c in cols]
+
+    # It's like np.concatenate([x for x in product(*[range(n) for n in n_categories])])
+    code_combinations = np.indices(n_categories).reshape(len(n_categories), -1)
+    result_categories = pd.Index(
+        ["_".join(map(str, x)) for x in product(*[df[c].cat.categories for c in cols])]
+    )
+
+    # Dataframe with unique combination of categories for each row
+    new_label_df = pd.DataFrame(
+        {
+            c: pd.Categorical.from_codes(code_combinations[i], df[c].cat.categories)
+            for i, c in enumerate(cols)
+        },
+        index=result_categories,
+    )
+
+    # Calculating result codes
+    factors = np.ones(len(cols) + 1, dtype=np.int32)  # First factor needs to be 1
+    np.cumsum(n_categories[::-1], out=factors[1:])
+    factors = factors[:-1][::-1]
+
+    code_array = np.zeros((len(cols), df.shape[0]), dtype=np.int32)
+    for i, c in enumerate(cols):
+        code_array[i] = df[c].cat.codes
+    code_array *= factors[:, None]
+
+    result_categorical = pd.Categorical.from_codes(
+        code_array.sum(axis=0), categories=result_categories
+    )
+
+    # Filter unused categories
+    result_categorical = result_categorical.remove_unused_categories()
+    new_label_df = new_label_df.loc[result_categorical.categories]
+
+    return result_categorical, new_label_df
+
+
+def sparse_indicator(
+    categorical: pd.Categorical,
+    *,
+    mask: NDArray[np.bool_] | None = None,
+    weight: NDArray[np.floating] | None = None,
+) -> sparse.coo_matrix:
+    if mask is not None and weight is None:
+        weight = mask.astype(np.float32)
+    elif mask is not None and weight is not None:
+        weight = mask * weight
+    elif mask is None and weight is None:
+        weight = np.broadcast_to(1.0, len(categorical))
+    A = sparse.coo_matrix(
+        (weight, (categorical.codes, np.arange(len(categorical)))),
+        shape=(len(categorical.categories), len(categorical)),
+    )
+    return A
diff --git a/scanpy/get/get.py b/scanpy/get/get.py
index a9ca372dc0..3080dfb2bb 100644
--- a/scanpy/get/get.py
+++ b/scanpy/get/get.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 from anndata import AnnData
+from packaging.version import Version
 from scipy.sparse import spmatrix
 
 if TYPE_CHECKING:
@@ -23,7 +24,7 @@
 # TODO: implement diffxpy method, make singledispatch
 def rank_genes_groups_df(
     adata: AnnData,
-    group: str | Iterable[str],
+    group: str | Iterable[str] | None,
     *,
     key: str = "rank_genes_groups",
     pval_cutoff: float | None = None,
@@ -74,7 +75,10 @@ def rank_genes_groups_df(
 
     d = [pd.DataFrame(adata.uns[key][c])[group] for c in colnames]
     d = pd.concat(d, axis=1, names=[None, "group"], keys=colnames)
-    d = d.stack(level=1).reset_index()
+    if Version(pd.__version__) >= Version("2.1"):
+        d = d.stack(level=1, future_stack=True).reset_index()
+    else:
+        d = d.stack(level=1).reset_index()
     d["group"] = pd.Categorical(d["group"], categories=group)
     d = d.sort_values(["group", "level_0"]).drop(columns="level_0")
 
diff --git a/scanpy/neighbors/_connectivity.py b/scanpy/neighbors/_connectivity.py
index f6788d33d2..adbde723f7 100644
--- a/scanpy/neighbors/_connectivity.py
+++ b/scanpy/neighbors/_connectivity.py
@@ -123,7 +123,7 @@ def umap(
         from umap.umap_ import fuzzy_simplicial_set
 
     X = coo_matrix(([], ([], [])), shape=(n_obs, 1))
-    connectivities = fuzzy_simplicial_set(
+    connectivities, _sigmas, _rhos = fuzzy_simplicial_set(
         X,
         n_neighbors,
         None,
@@ -134,8 +134,4 @@ def umap(
         local_connectivity=local_connectivity,
     )
 
-    if isinstance(connectivities, tuple):
-        # In umap-learn 0.4, this returns (result, sigmas, rhos)
-        connectivities = connectivities[0]
-
     return connectivities.tocsr()
diff --git a/scanpy/plotting/_stacked_violin.py b/scanpy/plotting/_stacked_violin.py
index 6b9abfd49c..21a9d9705b 100644
--- a/scanpy/plotting/_stacked_violin.py
+++ b/scanpy/plotting/_stacked_violin.py
@@ -7,6 +7,7 @@
 import pandas as pd
 from matplotlib import pyplot as plt
 from matplotlib.colors import Normalize, is_color_like
+from packaging.version import Version
 
 from .. import logging as logg
 from .._compat import old_positionals
@@ -468,9 +469,13 @@ def _make_rows_of_violinplots(
         # the expression value
         # This format is convenient to aggregate per gene or per category
         # while making the violin plots.
+        if Version(pd.__version__) >= Version("2.1"):
+            stack_kwargs = {"future_stack": True}
+        else:
+            stack_kwargs = {"dropna": False}
 
         df = (
-            pd.DataFrame(_matrix.stack(dropna=False))
+            pd.DataFrame(_matrix.stack(**stack_kwargs))
             .reset_index()
             .rename(
                 columns={
diff --git a/scanpy/preprocessing/_docs.py b/scanpy/preprocessing/_docs.py
index b46a5d48ff..3c13e84afb 100644
--- a/scanpy/preprocessing/_docs.py
+++ b/scanpy/preprocessing/_docs.py
@@ -26,7 +26,7 @@
     By default uses them if they have been determined beforehand.
 
     .. deprecated:: 1.10.0
-       Use `mask` instead
+       Use `mask_var` instead
 """
 
 doc_obs_qc_args = """\
diff --git a/scanpy/testing/_doctests.py b/scanpy/testing/_doctests.py
index 9be5b3f227..6a339ab7f8 100644
--- a/scanpy/testing/_doctests.py
+++ b/scanpy/testing/_doctests.py
@@ -9,22 +9,8 @@
 def doctest_needs(mod: str) -> Callable[[F], F]:
     """Mark function with doctest dependency."""
 
-    try:
-        from ._pytest.marks import needs
-    except ImportError:
-        mark = None
-    else:
-        try:
-            mark = needs[mod]
-        except KeyError:
-            raise KeyError(
-                f"Unknown dependency {mod}. If it isn’t a typo, "
-                "please add it to `needs` enum in `scanpy.testing._pytests.marks`."
-            ) from None
-
     def decorator(func: F) -> F:
-        if mark is not None:
-            func._doctest_needs = mark
+        func._doctest_needs = mod
         return func
 
     return decorator
diff --git a/scanpy/testing/_pytest/__init__.py b/scanpy/testing/_pytest/__init__.py
index ad42b8ec74..e864d31d3d 100644
--- a/scanpy/testing/_pytest/__init__.py
+++ b/scanpy/testing/_pytest/__init__.py
@@ -10,12 +10,11 @@
 
 from ..._utils import _import_name
 from .fixtures import *  # noqa: F403
+from .marks import needs
 
 if TYPE_CHECKING:
     from collections.abc import Generator, Iterable
 
-    from .marks import needs
-
 
 # Defining it here because it’s autouse.
 @pytest.fixture(autouse=True)
@@ -113,9 +112,9 @@ def _modify_doctests(request: pytest.FixtureRequest) -> None:
     request.getfixturevalue("_doctest_env")
 
     func = _import_name(request.node.name)
-    needs_marker: needs | None
-    if needs_marker := getattr(func, "_doctest_needs", None):
-        assert needs_marker.mark.name == "skipif"
+    needs_mod: str | None
+    if needs_mod := getattr(func, "_doctest_needs", None):
+        needs_marker = needs[needs_mod]
         if needs_marker.mark.args[0]:
             pytest.skip(reason=needs_marker.mark.kwargs["reason"])
     skip_reason: str | None
diff --git a/scanpy/testing/_pytest/marks.py b/scanpy/testing/_pytest/marks.py
index e6fa85f936..dd6fda5db0 100644
--- a/scanpy/testing/_pytest/marks.py
+++ b/scanpy/testing/_pytest/marks.py
@@ -57,4 +57,4 @@ def __init__(self, mod: str) -> None:
         if self._name_.casefold() != mod.casefold().replace("-", "_"):
             reason = f"{reason} (`pip install {mod}`)"
         dec = pytest.mark.skipif(not find_spec(self._name_), reason=reason)
-        super().__init__(dec.mark)
+        super().__init__(dec.mark, _ispytest=True)
diff --git a/scanpy/tests/_images/heatmap_var_as_dict/expected.png b/scanpy/tests/_images/heatmap_var_as_dict/expected.png
index 9406010120..aed510396f 100644
Binary files a/scanpy/tests/_images/heatmap_var_as_dict/expected.png and b/scanpy/tests/_images/heatmap_var_as_dict/expected.png differ
diff --git a/scanpy/tests/conftest.py b/scanpy/tests/conftest.py
index a3dcf69d75..5c96484f4e 100644
--- a/scanpy/tests/conftest.py
+++ b/scanpy/tests/conftest.py
@@ -7,6 +7,8 @@
 
 import pytest
 
+pytest_plugins = ["scanpy.testing._pytest"]
+
 # just import for the IMPORTED check
 import scanpy as _sc  # noqa: F401
 
diff --git a/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_1/expected.png b/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_1/expected.png
index d14bbab718..093cc370aa 100644
Binary files a/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_1/expected.png and b/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_1/expected.png differ
diff --git a/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_2/expected.png b/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_2/expected.png
index a7d4ea57b5..f4ca8d06f0 100644
Binary files a/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_2/expected.png and b/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_2/expected.png differ
diff --git a/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_3/expected.png b/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_3/expected.png
index e33a4e2d44..89d455fa3c 100644
Binary files a/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_3/expected.png and b/scanpy/tests/notebooks/_images_pbmc3k/rank_genes_groups_3/expected.png differ
diff --git a/scanpy/tests/notebooks/_images_pbmc3k/scatter_3/expected.png b/scanpy/tests/notebooks/_images_pbmc3k/scatter_3/expected.png
index 41bae501b5..2be9e84fd1 100644
Binary files a/scanpy/tests/notebooks/_images_pbmc3k/scatter_3/expected.png and b/scanpy/tests/notebooks/_images_pbmc3k/scatter_3/expected.png differ
diff --git a/scanpy/tests/notebooks/_images_pbmc3k/violin_2/expected.png b/scanpy/tests/notebooks/_images_pbmc3k/violin_2/expected.png
index 4f2bf2fbf2..a9c29f6c24 100644
Binary files a/scanpy/tests/notebooks/_images_pbmc3k/violin_2/expected.png and b/scanpy/tests/notebooks/_images_pbmc3k/violin_2/expected.png differ
diff --git a/scanpy/tests/notebooks/test_pbmc3k.py b/scanpy/tests/notebooks/test_pbmc3k.py
index 5e2c71e617..ad883d1403 100644
--- a/scanpy/tests/notebooks/test_pbmc3k.py
+++ b/scanpy/tests/notebooks/test_pbmc3k.py
@@ -28,11 +28,10 @@
 
 @needs.leidenalg
 def test_pbmc3k(image_comparer):
+    # ensure violin plots and other non-determinstic plots have deterministic behavior
+    np.random.seed(0)
     save_and_compare_images = partial(image_comparer, ROOT, tol=20)
-
-    adata = sc.read(
-        "./data/pbmc3k_raw.h5ad", backup_url="https://falexwolf.de/data/pbmc3k_raw.h5ad"
-    )
+    adata = sc.datasets.pbmc3k()
 
     # Preprocessing
 
@@ -105,13 +104,48 @@ def test_pbmc3k(image_comparer):
 
     # Clustering the graph
 
-    sc.tl.leiden(adata, resolution=0.9)
-    # sc.pl.umap(adata, color=['leiden', 'CST3', 'NKG7'], show=False)
-    # save_and_compare_images('umap_2')
+    sc.tl.leiden(
+        adata,
+        resolution=0.9,
+        random_state=0,
+        directed=False,
+        n_iterations=2,
+        flavor="igraph",
+    )
+
+    # sc.pl.umap(adata, color=["leiden", "CST3", "NKG7"], show=False)
+    # save_and_compare_images("umap_2")
     sc.pl.scatter(adata, "CST3", "NKG7", color="leiden", show=False)
     save_and_compare_images("scatter_3")
 
     # Finding marker genes
+    # Due to incosistency with our test runner vs local, these clusters need to
+    # be pre-annotated as the numbers for each cluster are not consistent.
+    marker_genes = [
+        "RP11-18H21.1",
+        "GZMK",
+        "CD79A",
+        "FCGR3A",
+        "GNLY",
+        "S100A8",
+        "FCER1A",
+        "PPBP",
+    ]
+    new_labels = ["0", "1", "2", "3", "4", "5", "6", "7"]
+    data_df = adata[:, marker_genes].to_df()
+    data_df["leiden"] = adata.obs["leiden"]
+    max_idxs = data_df.groupby("leiden", observed=True).mean().idxmax()
+    leiden_relabel = {}
+    for marker_gene, new_label in zip(marker_genes, new_labels):
+        leiden_relabel[max_idxs[marker_gene]] = new_label
+    adata.obs["leiden_old"] = adata.obs["leiden"].copy()
+    adata.rename_categories(
+        "leiden", [leiden_relabel[key] for key in sorted(leiden_relabel.keys())]
+    )
+    # ensure that the column can be sorted for consistent plotting since it is by default unordered
+    adata.obs["leiden"] = adata.obs["leiden"].cat.reorder_categories(
+        list(map(str, range(len(adata.obs["leiden"].cat.categories)))), ordered=True
+    )
 
     sc.tl.rank_genes_groups(adata, "leiden")
     sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False, show=False)
@@ -129,18 +163,13 @@ def test_pbmc3k(image_comparer):
     # sc.pl.rank_genes_groups_violin(adata, groups='0', n_genes=8)
     # save_and_compare_images('rank_genes_groups_4')
 
-    if adata[adata.obs["leiden"] == "4", "CST3"].X.mean() < 1:
-        (  # switch clusters
-            adata.obs["leiden"][adata.obs["leiden"] == "4"],
-            adata.obs["leiden"][adata.obs["leiden"] == "5"],
-        ) = ("5", "4")
     new_cluster_names = [
         "CD4 T cells",
-        "CD14+ Monocytes",
-        "B cells",
         "CD8 T cells",
+        "B cells",
         "NK cells",
         "FCGR3A+ Monocytes",
+        "CD14+ Monocytes",
         "Dendritic cells",
         "Megakaryocytes",
     ]
@@ -148,7 +177,6 @@ def test_pbmc3k(image_comparer):
 
     # sc.pl.umap(adata, color='leiden', legend_loc='on data', title='', frameon=False, show=False)
     # save_and_compare_images('umap_3')
-
     sc.pl.violin(
         adata, ["CST3", "NKG7", "PPBP"], groupby="leiden", rotation=90, show=False
     )
diff --git a/scanpy/tests/test_aggregated.py b/scanpy/tests/test_aggregated.py
new file mode 100644
index 0000000000..0b3e14b163
--- /dev/null
+++ b/scanpy/tests/test_aggregated.py
@@ -0,0 +1,380 @@
+from __future__ import annotations
+
+import anndata as ad
+import numpy as np
+import pandas as pd
+import pytest
+from packaging.version import Version
+from scipy.sparse import csr_matrix
+
+import scanpy as sc
+from scanpy._utils import _resolve_axis
+from scanpy.testing._helpers import assert_equal
+from scanpy.testing._helpers.data import pbmc3k_processed
+from scanpy.testing._pytest.params import ARRAY_TYPES_MEM
+
+
+@pytest.fixture
+def df_base():
+    ax_base = ["A", "B"]
+    return pd.DataFrame(index=ax_base)
+
+
+@pytest.fixture
+def df_groupby():
+    ax_groupby = [
+        *["v0", "v1", "v2"],
+        *["w0", "w1"],
+        *["a1", "a2", "a3"],
+        *["b1", "b2"],
+        *["c1", "c2"],
+        "d0",
+    ]
+
+    df_groupby = pd.DataFrame(index=pd.Index(ax_groupby, name="cell"))
+    df_groupby["key"] = pd.Categorical([c[0] for c in ax_groupby])
+    df_groupby["key_superset"] = pd.Categorical([c[0] for c in ax_groupby]).map(
+        {"v": "v", "w": "v", "a": "a", "b": "a", "c": "a", "d": "a"}
+    )
+    df_groupby["key_subset"] = pd.Categorical([c[1] for c in ax_groupby])
+    df_groupby["weight"] = 2.0
+    return df_groupby
+
+
+@pytest.fixture
+def X():
+    data = [
+        *[[0, -2], [1, 13], [2, 1]],  # v
+        *[[3, 12], [4, 2]],  # w
+        *[[5, 11], [6, 3], [7, 10]],  # a
+        *[[8, 4], [9, 9]],  # b
+        *[[10, 5], [11, 8]],  # c
+        [12, 6],  # d
+    ]
+    return np.array(data, dtype=np.float32)
+
+
+def gen_adata(data_key, dim, df_base, df_groupby, X):
+    if (data_key == "varm" and dim == "obs") or (data_key == "obsm" and dim == "var"):
+        pytest.skip("invalid parameter combination")
+
+    obs_df, var_df = (df_groupby, df_base) if dim == "obs" else (df_base, df_groupby)
+    data = X.T if dim == "var" and data_key != "varm" else X
+    if data_key != "X":
+        data_dict_sparse = {data_key: {"test": csr_matrix(data)}}
+        data_dict_dense = {data_key: {"test": data}}
+    else:
+        data_dict_sparse = {data_key: csr_matrix(data)}
+        data_dict_dense = {data_key: data}
+
+    adata_sparse = ad.AnnData(obs=obs_df, var=var_df, **data_dict_sparse)
+    adata_dense = ad.AnnData(obs=obs_df, var=var_df, **data_dict_dense)
+    return adata_sparse, adata_dense
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+def test_mask(axis):
+    blobs = sc.datasets.blobs()
+    mask = blobs.obs["blobs"] == 0
+    blobs.obs["mask_col"] = mask
+    if axis == 1:
+        blobs = blobs.T
+    by_name = sc.get.aggregate(blobs, "blobs", "sum", axis=axis, mask="mask_col")
+    by_value = sc.get.aggregate(blobs, "blobs", "sum", axis=axis, mask=mask)
+
+    assert_equal(by_name, by_value)
+
+    assert np.all(by_name["0"].layers["sum"] == 0)
+
+
+@pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM)
+@pytest.mark.parametrize("metric", ["sum", "mean", "var", "count_nonzero"])
+def test_aggregate_vs_pandas(metric, array_type):
+    adata = pbmc3k_processed().raw.to_adata()
+    adata = adata[
+        adata.obs["louvain"].isin(adata.obs["louvain"].cat.categories[:5]), :1_000
+    ].copy()
+    adata.X = array_type(adata.X)
+    adata.obs["percent_mito_binned"] = pd.cut(adata.obs["percent_mito"], bins=5)
+    result = sc.get.aggregate(adata, ["louvain", "percent_mito_binned"], metric)
+
+    if metric == "count_nonzero":
+        expected = (
+            (adata.to_df() != 0)
+            .astype(np.float64)
+            .join(adata.obs[["louvain", "percent_mito_binned"]])
+            .groupby(["louvain", "percent_mito_binned"], observed=True)
+            .agg("sum")
+        )
+    else:
+        expected = (
+            adata.to_df()
+            .astype(np.float64)
+            .join(adata.obs[["louvain", "percent_mito_binned"]])
+            .groupby(["louvain", "percent_mito_binned"], observed=True)
+            .agg(metric)
+        )
+    expected.index = expected.index.to_frame().apply(
+        lambda x: "_".join(map(str, x)), axis=1
+    )
+    expected.index.name = None
+    expected.columns.name = None
+
+    result_df = result.to_df(layer=metric)
+    result_df.index.name = None
+    result_df.columns.name = None
+
+    if Version(pd.__version__) < Version("2"):
+        # Order of results returned by groupby changed in pandas 2
+        assert expected.shape == result_df.shape
+        assert expected.index.isin(result_df.index).all()
+
+        expected = expected.loc[result_df.index]
+
+    pd.testing.assert_frame_equal(result_df, expected, check_dtype=False, atol=1e-5)
+
+
+@pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM)
+@pytest.mark.parametrize("metric", ["sum", "mean", "var", "count_nonzero"])
+def test_aggregate_axis(array_type, metric):
+    adata = pbmc3k_processed().raw.to_adata()
+    adata = adata[
+        adata.obs["louvain"].isin(adata.obs["louvain"].cat.categories[:5]), :1_000
+    ].copy()
+    adata.X = array_type(adata.X)
+    expected = sc.get.aggregate(adata, ["louvain"], metric)
+    actual = sc.get.aggregate(adata.T, ["louvain"], metric, axis=1).T
+
+    assert_equal(expected, actual)
+
+
+def test_aggregate_entry():
+    args = ("blobs", ["mean", "var", "count_nonzero"])
+
+    adata = sc.datasets.blobs()
+    X_result = sc.get.aggregate(adata, *args)
+    # layer adata
+    layer_adata = ad.AnnData(
+        obs=adata.obs,
+        var=adata.var,
+        layers={"test": adata.X.copy()},
+    )
+    layer_result = sc.get.aggregate(layer_adata, *args, layer="test")
+    obsm_adata = ad.AnnData(
+        obs=adata.obs,
+        var=adata.var,
+        obsm={"test": adata.X.copy()},
+    )
+    obsm_result = sc.get.aggregate(obsm_adata, *args, obsm="test")
+    varm_adata = ad.AnnData(
+        obs=adata.var,
+        var=adata.obs,
+        varm={"test": adata.X.copy()},
+    )
+    varm_result = sc.get.aggregate(varm_adata, *args, varm="test")
+
+    X_result_min = X_result.copy()
+    del X_result_min.var
+    X_result_min.var_names = [str(x) for x in np.arange(X_result_min.n_vars)]
+
+    assert_equal(X_result, layer_result)
+    assert_equal(X_result_min, obsm_result)
+    assert_equal(X_result.layers, obsm_result.layers)
+    assert_equal(X_result.layers, varm_result.T.layers)
+
+
+def test_aggregate_incorrect_dim():
+    adata = pbmc3k_processed().raw.to_adata()
+
+    with pytest.raises(ValueError, match="was 'foo'"):
+        sc.get.aggregate(adata, ["louvain"], "sum", axis="foo")
+
+
+@pytest.mark.parametrize("axis_name", ["obs", "var"])
+def test_aggregate_axis_specification(axis_name):
+    axis, axis_name = _resolve_axis(axis_name)
+    by = "blobs" if axis == 0 else "labels"
+
+    adata = sc.datasets.blobs()
+    adata.var["labels"] = np.tile(["a", "b"], adata.shape[1])[: adata.shape[1]]
+
+    agg_index = sc.get.aggregate(adata, by=by, func="mean", axis=axis)
+    agg_name = sc.get.aggregate(adata, by=by, func="mean", axis=axis_name)
+
+    np.testing.assert_equal(agg_index.layers["mean"], agg_name.layers["mean"])
+
+    if axis_name == "obs":
+        agg_unspecified = sc.get.aggregate(adata, by=by, func="mean")
+        np.testing.assert_equal(agg_name.layers["mean"], agg_unspecified.layers["mean"])
+
+
+@pytest.mark.parametrize(
+    ("matrix", "df", "keys", "metrics", "expected"),
+    [
+        pytest.param(
+            np.block(
+                [
+                    [np.ones((2, 2)), np.zeros((2, 2))],
+                    [np.zeros((2, 2)), np.ones((2, 2))],
+                ]
+            ),
+            pd.DataFrame(
+                {
+                    "a": ["a", "a", "b", "b"],
+                    "b": ["c", "d", "d", "d"],
+                }
+            ),
+            ["a", "b"],
+            ["count_nonzero"],  # , "sum", "mean"],
+            ad.AnnData(
+                obs=pd.DataFrame(
+                    {"a": ["a", "a", "b"], "b": ["c", "d", "d"]},
+                    index=["a_c", "a_d", "b_d"],
+                ).astype("category"),
+                var=pd.DataFrame(index=[f"gene_{i}" for i in range(4)]),
+                layers={
+                    "count_nonzero": np.array(
+                        [[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 2, 2]]
+                    ),
+                    # "sum": np.array([[2, 0], [0, 2]]),
+                    # "mean": np.array([[1, 0], [0, 1]]),
+                },
+            ),
+            id="count_nonzero",
+        ),
+        pytest.param(
+            np.block(
+                [
+                    [np.ones((2, 2)), np.zeros((2, 2))],
+                    [np.zeros((2, 2)), np.ones((2, 2))],
+                ]
+            ),
+            pd.DataFrame(
+                {
+                    "a": ["a", "a", "b", "b"],
+                    "b": ["c", "d", "d", "d"],
+                }
+            ),
+            ["a", "b"],
+            ["sum", "mean", "count_nonzero"],
+            ad.AnnData(
+                obs=pd.DataFrame(
+                    {"a": ["a", "a", "b"], "b": ["c", "d", "d"]},
+                    index=["a_c", "a_d", "b_d"],
+                ).astype("category"),
+                var=pd.DataFrame(index=[f"gene_{i}" for i in range(4)]),
+                layers={
+                    "sum": np.array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 2, 2]]),
+                    "mean": np.array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 1, 1]]),
+                    "count_nonzero": np.array(
+                        [[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 2, 2]]
+                    ),
+                },
+            ),
+            id="sum-mean-count_nonzero",
+        ),
+        pytest.param(
+            np.block(
+                [
+                    [np.ones((2, 2)), np.zeros((2, 2))],
+                    [np.zeros((2, 2)), np.ones((2, 2))],
+                ]
+            ),
+            pd.DataFrame(
+                {
+                    "a": ["a", "a", "b", "b"],
+                    "b": ["c", "d", "d", "d"],
+                }
+            ),
+            ["a", "b"],
+            ["mean"],
+            ad.AnnData(
+                obs=pd.DataFrame(
+                    {"a": ["a", "a", "b"], "b": ["c", "d", "d"]},
+                    index=["a_c", "a_d", "b_d"],
+                ).astype("category"),
+                var=pd.DataFrame(index=[f"gene_{i}" for i in range(4)]),
+                layers={
+                    "mean": np.array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 1, 1]]),
+                },
+            ),
+            id="mean",
+        ),
+    ],
+)
+def test_aggregate_examples(matrix, df, keys, metrics, expected):
+    adata = ad.AnnData(
+        X=matrix,
+        obs=df,
+        var=pd.DataFrame(index=[f"gene_{i}" for i in range(matrix.shape[1])]),
+    )
+    result = sc.get.aggregate(adata, by=keys, func=metrics)
+
+    print(result)
+    print(expected)
+
+    assert_equal(expected, result)
+
+
+@pytest.mark.parametrize(
+    ("label_cols", "cols", "expected"),
+    [
+        pytest.param(
+            dict(
+                a=pd.Categorical(["a", "b", "c"]),
+                b=pd.Categorical(["d", "d", "f"]),
+            ),
+            ["a", "b"],
+            pd.Categorical(["a_d", "b_d", "c_f"]),
+            id="two_of_two",
+        ),
+        pytest.param(
+            dict(
+                a=pd.Categorical(["a", "b", "c"]),
+                b=pd.Categorical(["d", "d", "f"]),
+                c=pd.Categorical(["g", "h", "h"]),
+            ),
+            ["a", "b", "c"],
+            pd.Categorical(["a_d_g", "b_d_h", "c_f_h"]),
+            id="three_of_three",
+        ),
+        pytest.param(
+            dict(
+                a=pd.Categorical(["a", "b", "c"]),
+                b=pd.Categorical(["d", "d", "f"]),
+                c=pd.Categorical(["g", "h", "h"]),
+            ),
+            ["a", "c"],
+            pd.Categorical(["a_g", "b_h", "c_h"]),
+            id="two_of_three-1",
+        ),
+        pytest.param(
+            dict(
+                a=pd.Categorical(["a", "b", "c"]),
+                b=pd.Categorical(["d", "d", "f"]),
+                c=pd.Categorical(["g", "h", "h"]),
+            ),
+            ["b", "c"],
+            pd.Categorical(["d_g", "d_h", "f_h"]),
+            id="two_of_three-2",
+        ),
+    ],
+)
+def test_combine_categories(label_cols, cols, expected):
+    from scanpy.get._aggregated import _combine_categories
+
+    label_df = pd.DataFrame(label_cols)
+    result, result_label_df = _combine_categories(label_df, cols)
+
+    assert isinstance(result, pd.Categorical)
+
+    pd.testing.assert_extension_array_equal(result, expected)
+
+    pd.testing.assert_index_equal(
+        pd.Index(result), result_label_df.index.astype("category")
+    )
+
+    reconstructed_df = pd.DataFrame(
+        [x.split("_") for x in result], columns=cols, index=result.astype(str)
+    ).astype("category")
+    pd.testing.assert_frame_equal(reconstructed_df, result_label_df)
diff --git a/scanpy/tests/test_clustering.py b/scanpy/tests/test_clustering.py
index f42a4a4b2a..d2005d9659 100644
--- a/scanpy/tests/test_clustering.py
+++ b/scanpy/tests/test_clustering.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import pytest
+from sklearn.metrics.cluster import normalized_mutual_info_score
 
 import scanpy as sc
 from scanpy.testing._helpers.data import pbmc68k_reduced
@@ -12,11 +13,136 @@ def adata_neighbors():
     return pbmc68k_reduced()
 
 
+FLAVORS = [
+    pytest.param("igraph", marks=needs.igraph),
+    pytest.param("leidenalg", marks=needs.leidenalg),
+]
+
+
+@needs.leidenalg
+@needs.igraph
+@pytest.mark.parametrize("flavor", FLAVORS)
+@pytest.mark.parametrize("resolution", [1, 2])
+@pytest.mark.parametrize("n_iterations", [-1, 3])
+def test_leiden_basic(adata_neighbors, flavor, resolution, n_iterations):
+    sc.tl.leiden(
+        adata_neighbors,
+        flavor=flavor,
+        resolution=resolution,
+        n_iterations=n_iterations,
+        directed=(flavor == "leidenalg"),
+    )
+    assert adata_neighbors.uns["leiden"]["params"]["resolution"] == resolution
+    assert adata_neighbors.uns["leiden"]["params"]["n_iterations"] == n_iterations
+
+
 @needs.leidenalg
-def test_leiden_basic(adata_neighbors):
-    sc.tl.leiden(adata_neighbors)
+@needs.igraph
+@pytest.mark.parametrize("flavor", FLAVORS)
+def test_leiden_random_state(adata_neighbors, flavor):
+    is_leiden_alg = flavor == "leidenalg"
+    n_iterations = 2 if is_leiden_alg else -1
+    adata_1 = sc.tl.leiden(
+        adata_neighbors,
+        flavor=flavor,
+        random_state=1,
+        copy=True,
+        directed=is_leiden_alg,
+        n_iterations=n_iterations,
+    )
+    adata_1_again = sc.tl.leiden(
+        adata_neighbors,
+        flavor=flavor,
+        random_state=1,
+        copy=True,
+        directed=is_leiden_alg,
+        n_iterations=n_iterations,
+    )
+    adata_2 = sc.tl.leiden(
+        adata_neighbors,
+        flavor=flavor,
+        random_state=2,
+        copy=True,
+        directed=is_leiden_alg,
+        n_iterations=n_iterations,
+    )
+    assert (adata_1.obs["leiden"] == adata_1_again.obs["leiden"]).all()
+    assert (adata_2.obs["leiden"] != adata_1_again.obs["leiden"]).any()
+
+
+@needs.igraph
+def test_leiden_igraph_directed(adata_neighbors):
+    with pytest.raises(ValueError):
+        sc.tl.leiden(adata_neighbors, flavor="igraph", directed=True)
+
+
+@needs.igraph
+def test_leiden_wrong_flavor(adata_neighbors):
+    with pytest.raises(ValueError):
+        sc.tl.leiden(adata_neighbors, flavor="foo")
+
+
+@needs.igraph
+@needs.leidenalg
+def test_leiden_igraph_partition_type(adata_neighbors):
+    import leidenalg
+
+    with pytest.raises(ValueError):
+        sc.tl.leiden(
+            adata_neighbors,
+            flavor="igraph",
+            partition_type=leidenalg.RBConfigurationVertexPartition,
+        )
+
+
+@needs.leidenalg
+@needs.igraph
+def test_leiden_equal_defaults_same_args(adata_neighbors):
+    """Ensure the two implementations are the same for the same args."""
+    leiden_alg_clustered = sc.tl.leiden(
+        adata_neighbors, flavor="leidenalg", copy=True, n_iterations=2
+    )
+    igraph_clustered = sc.tl.leiden(
+        adata_neighbors, flavor="igraph", copy=True, directed=False, n_iterations=2
+    )
+    assert (
+        normalized_mutual_info_score(
+            leiden_alg_clustered.obs["leiden"], igraph_clustered.obs["leiden"]
+        )
+        > 0.9
+    )
+
+
+@needs.leidenalg
+@needs.igraph
+def test_leiden_equal_defaults(adata_neighbors):
+    """Ensure that the old leidenalg defaults are close enough to the current default outputs."""
+    leiden_alg_clustered = sc.tl.leiden(
+        adata_neighbors, flavor="leidenalg", directed=True, copy=True
+    )
+    igraph_clustered = sc.tl.leiden(
+        adata_neighbors, copy=True, n_iterations=2, directed=False
+    )
+    assert (
+        normalized_mutual_info_score(
+            leiden_alg_clustered.obs["leiden"], igraph_clustered.obs["leiden"]
+        )
+        > 0.9
+    )
+
+
+@needs.igraph
+def test_leiden_objective_function(adata_neighbors):
+    """Ensure that popping this as a `clustering_kwargs` and using it does not error out."""
+    sc.tl.leiden(
+        adata_neighbors,
+        objective_function="modularity",
+        flavor="igraph",
+        directed=False,
+    )
 
 
+@needs.igraph
 @pytest.mark.parametrize(
     "clustering,key",
     [
@@ -52,6 +178,7 @@ def test_clustering_subset(adata_neighbors, clustering, key):
 
 
 @needs.louvain
+@needs.igraph
 def test_louvain_basic(adata_neighbors):
     sc.tl.louvain(adata_neighbors)
     sc.tl.louvain(adata_neighbors, use_weights=True)
@@ -60,6 +187,7 @@ def test_louvain_basic(adata_neighbors):
 
 
 @needs.louvain
+@needs.igraph
 def test_partition_type(adata_neighbors):
     import louvain
 
diff --git a/scanpy/tests/test_pca.py b/scanpy/tests/test_pca.py
index a18a313063..3150808ee1 100644
--- a/scanpy/tests/test_pca.py
+++ b/scanpy/tests/test_pca.py
@@ -165,7 +165,7 @@ def test_pca_transform(array_type):
 
     with warnings.catch_warnings(record=True) as record:
         sc.pp.pca(adata, n_comps=4, zero_center=True, dtype="float64")
-    assert len(record) == 0
+    assert len(record) == 0, record
 
     assert np.linalg.norm(A_pca_abs[:, :4] - np.abs(adata.obsm["X_pca"])) < 2e-05
 
@@ -382,7 +382,7 @@ def test_mask_order_warning(request):
         UserWarning,
         match="When using a mask parameter with anndata<0.9 on a dense array",
     ):
-        sc.pp.pca(adata, mask=mask)
+        sc.pp.pca(adata, mask_var=mask)
 
 
 def test_mask_defaults(array_type, float_dtype):
diff --git a/scanpy/tests/test_plotting.py b/scanpy/tests/test_plotting.py
index 0a03fdd8ba..5482c0d72f 100644
--- a/scanpy/tests/test_plotting.py
+++ b/scanpy/tests/test_plotting.py
@@ -103,7 +103,14 @@ def test_heatmap(image_comparer):
 
     # test var_names as dict
     pbmc = pbmc68k_reduced()
-    sc.tl.leiden(pbmc, key_added="clusters", resolution=0.5)
+    sc.tl.leiden(
+        pbmc,
+        key_added="clusters",
+        resolution=0.5,
+        flavor="igraph",
+        n_iterations=2,
+        directed=False,
+    )
     # call umap to trigger colors for the clusters
     sc.pl.umap(pbmc, color="clusters")
     marker_genes_dict = {
diff --git a/scanpy/tests/test_score_genes.py b/scanpy/tests/test_score_genes.py
index cb44e46dc9..23bd934e78 100644
--- a/scanpy/tests/test_score_genes.py
+++ b/scanpy/tests/test_score_genes.py
@@ -66,8 +66,8 @@ def test_score_with_reference():
     sc.tl.score_genes(adata, gene_list=adata.var_names[:100], score_name="Test")
     with Path(HERE, "score_genes_reference_paul2015.pkl").open("rb") as file:
         reference = pickle.load(file)
-    # np.testing.assert_allclose(reference, adata.obs.Test.values)
-    np.testing.assert_array_equal(reference, adata.obs.Test.values)
+    # np.testing.assert_allclose(reference, adata.obs["Test"].to_numpy())
+    np.testing.assert_array_equal(reference, adata.obs["Test"].to_numpy())
 
 
 def test_add_score():
diff --git a/scanpy/tools/_ingest.py b/scanpy/tools/_ingest.py
index 4c8528180b..fb36b7a913 100644
--- a/scanpy/tools/_ingest.py
+++ b/scanpy/tools/_ingest.py
@@ -222,12 +222,9 @@ class Ingest:
     """
 
     def _init_umap(self, adata):
-        import umap as u
+        from umap import UMAP
 
-        if not self._use_pynndescent:
-            u.umap_._HAVE_PYNNDESCENT = False
-
-        self._umap = u.UMAP(
+        self._umap = UMAP(
             metric=self._metric,
             random_state=adata.uns["umap"]["params"].get("random_state", 0),
         )
@@ -246,79 +243,16 @@ def _init_umap(self, adata):
         self._umap._n_neighbors = self._n_neighbors
         self._umap.n_neighbors = self._n_neighbors
 
-        if not self._use_pynndescent:
-            if self._random_init is not None or self._tree_init is not None:
-                self._umap._random_init = self._random_init
-                self._umap._tree_init = self._tree_init
-                self._umap._search = self._search
-
-            if self._dist_func is not None:
-                self._umap._input_distance_func = self._dist_func
-
-            self._umap._rp_forest = self._rp_forest
-
-            self._umap._search_graph = self._search_graph
-        else:
-            self._umap._knn_search_index = self._nnd_idx
+        self._umap._knn_search_index = self._nnd_idx
 
         self._umap._a = adata.uns["umap"]["params"]["a"]
         self._umap._b = adata.uns["umap"]["params"]["b"]
 
         self._umap._input_hash = None
 
-    def _init_dist_search(self, dist_args):
-        from functools import partial
-
-        from umap.distances import named_distances
-        from umap.nndescent import initialise_search
-
-        self._random_init = None
-        self._tree_init = None
-
-        self._initialise_search = None
-        self._search = None
-
-        self._dist_func = None
-
-        dist_func = named_distances[self._metric]
-
-        if pkg_version("umap-learn") < version.parse("0.4.0"):
-            from umap.nndescent import (
-                make_initialisations,
-                make_initialized_nnd_search,
-            )
-
-            self._random_init, self._tree_init = make_initialisations(
-                dist_func, dist_args
-            )
-            _initialise_search = partial(
-                initialise_search,
-                init_from_random=self._random_init,
-                init_from_tree=self._tree_init,
-            )
-            _search = make_initialized_nnd_search(dist_func, dist_args)
-
-        else:
-            from numba import njit
-            from umap.nndescent import initialized_nnd_search
-
-            @njit
-            def partial_dist_func(x, y):
-                return dist_func(x, y, *dist_args)
-
-            _initialise_search = partial(initialise_search, dist=partial_dist_func)
-            _search = partial(initialized_nnd_search, dist=partial_dist_func)
-
-            self._dist_func = partial_dist_func
-
-        self._initialise_search = _initialise_search
-        self._search = _search
-
     def _init_pynndescent(self, distances):
         from pynndescent import NNDescent
 
-        self._use_pynndescent = True
-
         first_col = np.arange(distances.shape[0])[:, None]
         init_indices = np.hstack((first_col, np.stack(distances.tolil().rows)))
 
@@ -365,27 +299,13 @@ def _init_neighbors(self, adata, neighbors_key):
 
         if "metric_kwds" in neighbors["params"]:
             self._metric_kwds = neighbors["params"]["metric_kwds"]
-            dist_args = tuple(self._metric_kwds.values())
         else:
             self._metric_kwds = {}
-            dist_args = ()
 
         self._metric = neighbors["params"]["metric"]
 
-        if pkg_version("umap-learn") < version.parse("0.5.0"):
-            self._init_dist_search(dist_args)
-
-            search_graph = neighbors["distances"].copy()
-            search_graph.data = (search_graph.data > 0).astype(np.int8)
-            self._search_graph = search_graph.maximum(search_graph.transpose())
-
-            if "rp_forest" in neighbors:
-                self._rp_forest = _rp_forest_generate(neighbors["rp_forest"])
-            else:
-                self._rp_forest = None
-        else:
-            self._neigh_random_state = neighbors["params"].get("random_state", 0)
-            self._init_pynndescent(neighbors["distances"])
+        self._neigh_random_state = neighbors["params"].get("random_state", 0)
+        self._init_pynndescent(neighbors["distances"])
 
     def _init_pca(self, adata):
         self._pca_centered = adata.uns["pca"]["params"]["zero_center"]
@@ -411,9 +331,6 @@ def __init__(self, adata: AnnData, neighbors_key: str | None = None):
         self._adata_ref = adata
         self._adata_new = None
 
-        # only use with umap > 0.5.0
-        self._use_pynndescent = False
-
         if "pca" in adata.uns:
             self._init_pca(adata)
 
@@ -498,29 +415,13 @@ def neighbors(self, k=None, queue_size=5, epsilon=0.1, random_state=0):
         random_state = check_random_state(random_state)
         rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
 
-        train = self._rep
         test = self._obsm["rep"]
 
         if k is None:
             k = self._n_neighbors
 
-        if self._use_pynndescent:
-            self._nnd_idx.search_rng_state = rng_state
-
-            self._indices, self._distances = self._nnd_idx.query(test, k, epsilon)
-
-        else:
-            from umap.utils import deheap_sort
-
-            init = self._initialise_search(
-                self._rp_forest, train, test, int(k * queue_size), rng_state=rng_state
-            )
-
-            result = self._search(
-                train, self._search_graph.indptr, self._search_graph.indices, init, test
-            )
-            indices, dists = deheap_sort(result)
-            self._indices, self._distances = indices[:, :k], dists[:, :k]
+        self._nnd_idx.search_rng_state = rng_state
+        self._indices, self._distances = self._nnd_idx.query(test, k, epsilon)
 
     def _umap_transform(self):
         return self._umap.transform(self._obsm["rep"])
diff --git a/scanpy/tools/_leiden.py b/scanpy/tools/_leiden.py
index aa8a9f4981..5345e3567b 100644
--- a/scanpy/tools/_leiden.py
+++ b/scanpy/tools/_leiden.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+import importlib
+import warnings
+from typing import TYPE_CHECKING, Literal
 
 import numpy as np
 import pandas as pd
@@ -34,14 +36,15 @@ def leiden(
     random_state: _utils.AnyRandom = 0,
     key_added: str = "leiden",
     adjacency: sparse.spmatrix | None = None,
-    directed: bool = True,
+    directed: bool | None = None,
     use_weights: bool = True,
     n_iterations: int = -1,
     partition_type: type[MutableVertexPartition] | None = None,
     neighbors_key: str | None = None,
     obsp: str | None = None,
     copy: bool = False,
-    **partition_kwargs,
+    flavor: Literal["leidenalg", "ipgraph"] = "leidenalg",
+    **clustering_args,
 ) -> AnnData | None:
     """\
     Cluster cells into subgroups [Traag18]_.
@@ -80,6 +83,7 @@ def leiden(
         How many iterations of the Leiden clustering algorithm to perform.
         Positive values above 2 define the total number of iterations to perform,
         -1 has the algorithm run until it reaches its optimal clustering.
+        2 is faster and the default for underlying packages.
     partition_type
         Type of partition to use.
         Defaults to :class:`~leidenalg.RBConfigurationVertexPartition`.
@@ -96,9 +100,11 @@ def leiden(
         `obsp` and `neighbors_key` at the same time.
     copy
         Whether to copy `adata` or modify it inplace.
-    **partition_kwargs
-        Any further arguments to pass to `~leidenalg.find_partition`
-        (which in turn passes arguments to the `partition_type`).
+    flavor
+        Which package's implementation to use.
+    **clustering_args
+        Any further arguments to pass to :func:`~leidenalg.find_partition` (which in turn passes arguments to the `partition_type`)
+        or :meth:`igraph.Graph.community_leiden` from `igraph`.
 
     Returns
     -------
@@ -112,13 +118,35 @@ def leiden(
         A dict with the values for the parameters `resolution`, `random_state`,
         and `n_iterations`.
     """
-    try:
-        import leidenalg
-    except ImportError:
+    if flavor not in {"igraph", "leidenalg"}:
+        raise ValueError(
+            f"flavor must be either 'igraph' or 'leidenalg', but '{flavor}' was passed"
+        )
+    igraph_spec = importlib.util.find_spec("igraph")
+    if igraph_spec is None:
         raise ImportError(
-            "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`."
+            "Please install the igraph package: `conda install -c conda-forge igraph` or `pip3 install igraph`."
         )
-    partition_kwargs = dict(partition_kwargs)
+    if flavor == "igraph":
+        if directed:
+            raise ValueError(
+                "Cannot use igraph's leiden implemntation with a directed graph."
+            )
+        if partition_type is not None:
+            raise ValueError(
+                "Do not pass in partition_type argument when using igraph."
+            )
+    else:
+        try:
+            import leidenalg
+
+            msg = 'Use of leidenalg is discouraged and will be deprecated in the future.  Please use `flavor="igraph"` `n_iterations=2` to achieve similar results.  `directed` must also be `False` to work with `igraph`\'s implementation.'
+            warnings.warn(msg, FutureWarning)
+        except ImportError:
+            raise ImportError(
+                "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`."
+            )
+    clustering_args = dict(clustering_args)
 
     start = logg.info("running Leiden clustering")
     adata = adata.copy() if copy else adata
@@ -133,23 +161,29 @@ def leiden(
             restrict_categories=restrict_categories,
             adjacency=adjacency,
         )
-    # convert it to igraph
-    g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
-    # flip to the default partition type if not overriden by the user
-    if partition_type is None:
-        partition_type = leidenalg.RBConfigurationVertexPartition
     # Prepare find_partition arguments as a dictionary,
     # appending to whatever the user provided. It needs to be this way
     # as this allows for the accounting of a None resolution
     # (in the case of a partition variant that doesn't take it on input)
-    if use_weights:
-        partition_kwargs["weights"] = np.array(g.es["weight"]).astype(np.float64)
-    partition_kwargs["n_iterations"] = n_iterations
-    partition_kwargs["seed"] = random_state
+    clustering_args["n_iterations"] = n_iterations
     if resolution is not None:
-        partition_kwargs["resolution_parameter"] = resolution
-    # clustering proper
-    part = leidenalg.find_partition(g, partition_type, **partition_kwargs)
+        clustering_args["resolution_parameter"] = resolution
+    if flavor == "leidenalg":
+        directed = True if directed is None else directed
+        g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
+        if partition_type is None:
+            partition_type = leidenalg.RBConfigurationVertexPartition
+        if use_weights:
+            clustering_args["weights"] = np.array(g.es["weight"]).astype(np.float64)
+        clustering_args["seed"] = random_state
+        part = leidenalg.find_partition(g, partition_type, **clustering_args)
+    else:
+        g = _utils.get_igraph_from_adjacency(adjacency, directed=False)
+        if use_weights:
+            clustering_args["weights"] = "weight"
+        clustering_args.setdefault("objective_function", "modularity")
+        with _utils.set_igraph_random_state(random_state):
+            part = g.community_leiden(**clustering_args)
     # store output into adata.obs
     groups = np.array(part.membership)
     if restrict_to is not None:
diff --git a/scanpy/tools/_rank_genes_groups.py b/scanpy/tools/_rank_genes_groups.py
index ba9196a842..8e133434bf 100644
--- a/scanpy/tools/_rank_genes_groups.py
+++ b/scanpy/tools/_rank_genes_groups.py
@@ -582,7 +582,7 @@ def rank_genes_groups(
     Notes
     -----
     There are slight inconsistencies depending on whether sparse
-    or dense data are passed. See `here <https://github.com/scverse/scanpy/blob/master/scanpy/tests/test_rank_genes_groups.py>`__.
+    or dense data are passed. See `here <https://github.com/scverse/scanpy/blob/main/scanpy/tests/test_rank_genes_groups.py>`__.
 
     Examples
     --------
diff --git a/scanpy/tools/_score_genes.py b/scanpy/tools/_score_genes.py
index 3e31f0cc66..307a162b12 100644
--- a/scanpy/tools/_score_genes.py
+++ b/scanpy/tools/_score_genes.py
@@ -51,10 +51,10 @@ def _sparse_nanmean(X, axis):
 )
 def score_genes(
     adata: AnnData,
-    gene_list: Sequence[str],
+    gene_list: Sequence[str] | pd.Index[str],
     *,
     ctrl_size: int = 50,
-    gene_pool: Sequence[str] | None = None,
+    gene_pool: Sequence[str] | pd.Index[str] | None = None,
     n_bins: int = 25,
     score_name: str = "score",
     random_state: AnyRandom = 0,
@@ -114,26 +114,20 @@ def score_genes(
     if random_state is not None:
         np.random.seed(random_state)
 
-    gene_list_in_var = []
     var_names = adata.raw.var_names if use_raw else adata.var_names
-    genes_to_ignore = []
-    for gene in gene_list:
-        if gene in var_names:
-            gene_list_in_var.append(gene)
-        else:
-            genes_to_ignore.append(gene)
+    gene_list = pd.Index([gene_list] if isinstance(gene_list, str) else gene_list)
+    genes_to_ignore = gene_list.difference(var_names, sort=False)  # first get missing
+    gene_list = gene_list.intersection(var_names)  # then restrict to present
     if len(genes_to_ignore) > 0:
         logg.warning(f"genes are not in var_names and ignored: {genes_to_ignore}")
-    gene_list = set(gene_list_in_var)
-
     if len(gene_list) == 0:
         raise ValueError("No valid genes were passed for scoring.")
 
     if gene_pool is None:
-        gene_pool = list(var_names)
+        gene_pool = pd.Index(var_names, dtype="string")
     else:
-        gene_pool = [x for x in gene_pool if x in var_names]
-    if not gene_pool:
+        gene_pool = pd.Index(gene_pool, dtype="string").intersection(var_names)
+    if len(gene_pool) == 0:
         raise ValueError("No valid genes were passed for reference set.")
 
     # Trying here to match the Seurat approach in scoring cells.
@@ -144,34 +138,28 @@ def score_genes(
     _adata_subset = (
         _adata[:, gene_pool] if len(gene_pool) < len(_adata.var_names) else _adata
     )
+    # average expression of genes
     if issparse(_adata_subset.X):
         obs_avg = pd.Series(
             np.array(_sparse_nanmean(_adata_subset.X, axis=0)).flatten(),
             index=gene_pool,
-        )  # average expression of genes
+        )
     else:
-        obs_avg = pd.Series(
-            np.nanmean(_adata_subset.X, axis=0), index=gene_pool
-        )  # average expression of genes
+        obs_avg = pd.Series(np.nanmean(_adata_subset.X, axis=0), index=gene_pool)
 
-    obs_avg = obs_avg[
-        np.isfinite(obs_avg)
-    ]  # Sometimes (and I don't know how) missing data may be there, with nansfor
+    # Sometimes (and I don't know how) missing data may be there, with nansfor
+    obs_avg = obs_avg[np.isfinite(obs_avg)]
 
     n_items = int(np.round(len(obs_avg) / (n_bins - 1)))
     obs_cut = obs_avg.rank(method="min") // n_items
-    control_genes = set()
+    control_genes = pd.Index([], dtype="string")
 
     # now pick `ctrl_size` genes from every cut
-    for cut in np.unique(obs_cut.loc[list(gene_list)]):
-        r_genes = np.array(obs_cut[obs_cut == cut].index)
-        np.random.shuffle(r_genes)
-        # uses full r_genes if ctrl_size > len(r_genes)
-        control_genes.update(set(r_genes[:ctrl_size]))
-
-    # To index, we need a list – indexing implies an order.
-    control_genes = list(control_genes - gene_list)
-    gene_list = list(gene_list)
+    for cut in np.unique(obs_cut.loc[gene_list]):
+        r_genes: pd.Index[str] = obs_cut[obs_cut == cut].index
+        if ctrl_size < len(r_genes):
+            r_genes = r_genes.to_series().sample(ctrl_size).index
+        control_genes = control_genes.union(r_genes.difference(gene_list))
 
     X_list = _adata[:, gene_list].X
     if issparse(X_list):
diff --git a/scanpy/tools/_umap.py b/scanpy/tools/_umap.py
index 6b18438e1b..8105c3abd8 100644
--- a/scanpy/tools/_umap.py
+++ b/scanpy/tools/_umap.py
@@ -4,7 +4,6 @@
 from typing import TYPE_CHECKING, Literal
 
 import numpy as np
-from packaging import version
 from sklearn.utils import check_array, check_random_state
 
 from .. import logging as logg
@@ -165,29 +164,12 @@ def umap(
             f'.obsp["{neighbors["connectivities_key"]}"] have not been computed using umap'
         )
 
-    # Compat for umap 0.4 -> 0.5
     with warnings.catch_warnings():
         # umap 0.5.0
         warnings.filterwarnings("ignore", message=r"Tensorflow not installed")
         import umap
 
-    if version.parse(umap.__version__) >= version.parse("0.5.0"):
-
-        def simplicial_set_embedding(*args, **kwargs):
-            from umap.umap_ import simplicial_set_embedding
-
-            X_umap, _ = simplicial_set_embedding(
-                *args,
-                densmap=False,
-                densmap_kwds={},
-                output_dens=False,
-                **kwargs,
-            )
-            return X_umap
-
-    else:
-        from umap.umap_ import simplicial_set_embedding
-    from umap.umap_ import find_ab_params
+    from umap.umap_ import find_ab_params, simplicial_set_embedding
 
     if a is None or b is None:
         a, b = find_ab_params(spread, min_dist)
@@ -222,20 +204,23 @@ def simplicial_set_embedding(*args, **kwargs):
         # for the init condition in the UMAP embedding
         default_epochs = 500 if neighbors["connectivities"].shape[0] <= 10000 else 200
         n_epochs = default_epochs if maxiter is None else maxiter
-        X_umap = simplicial_set_embedding(
-            X,
-            neighbors["connectivities"].tocoo(),
-            n_components,
-            alpha,
-            a,
-            b,
-            gamma,
-            negative_sample_rate,
-            n_epochs,
-            init_coords,
-            random_state,
-            neigh_params.get("metric", "euclidean"),
-            neigh_params.get("metric_kwds", {}),
+        X_umap, _ = simplicial_set_embedding(
+            data=X,
+            graph=neighbors["connectivities"].tocoo(),
+            n_components=n_components,
+            initial_alpha=alpha,
+            a=a,
+            b=b,
+            gamma=gamma,
+            negative_sample_rate=negative_sample_rate,
+            n_epochs=n_epochs,
+            init=init_coords,
+            random_state=random_state,
+            metric=neigh_params.get("metric", "euclidean"),
+            metric_kwds=neigh_params.get("metric_kwds", {}),
+            densmap=False,
+            densmap_kwds={},
+            output_dens=False,
             verbose=settings.verbosity > 3,
         )
     elif method == "rapids":