-
Notifications
You must be signed in to change notification settings - Fork 616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
(feat): igraph
leiden implementation now included as an option in sc.tl.leiden
#2815
Changes from 65 commits
eba6a9a
519cad3
25b6705
00f5904
7f46900
e306ac3
642235d
5439d9d
2449148
2fe2b9a
a14b13e
8f3b169
b89eaa0
f67225d
202787c
d738092
2d8ab25
ece40bf
b24d1c4
a4aebfd
4fcbcc6
7a75fdf
a79e00c
fd748f4
be32bc2
07ffc84
fbd2173
214eaa4
345fcf4
1f35a00
ac75b6b
488ea75
3db3bb5
540a204
d3afd43
0dbfe7b
26e6540
0bcb2b7
d583619
cf8449c
6a306e8
3cdd337
60c53eb
bf02b53
8833246
015a2ac
3cf18f7
e903794
3dc2d95
0ba3a04
bd8382d
6958e7d
4f736df
eb070d6
4b8c823
66cc1e2
f2fc12b
5c49c56
04cd7f9
16d822c
bca86ff
dd540dc
b9b5b19
3482560
b3bb3d2
ebe1c16
0736afc
de7af6a
99aa47c
726c59e
f2da795
13ae6b6
cc31a2e
935d34f
662f918
5df37d2
51f0a02
9f6b535
102d128
84dd615
d6b1dff
f2db271
5e09532
579f005
1cefa19
6247d76
2549f61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,11 +28,10 @@ | |
|
||
@needs.leidenalg | ||
def test_pbmc3k(image_comparer): | ||
# ensure violin plots and other non-determinstic plots have deterministic behavior | ||
np.random.seed(0) | ||
save_and_compare_images = partial(image_comparer, ROOT, tol=20) | ||
|
||
adata = sc.read( | ||
"./data/pbmc3k_raw.h5ad", backup_url="https://falexwolf.de/data/pbmc3k_raw.h5ad" | ||
) | ||
adata = sc.datasets.pbmc3k() | ||
|
||
# Preprocessing | ||
|
||
|
@@ -105,13 +104,41 @@ def test_pbmc3k(image_comparer): | |
|
||
# Clustering the graph | ||
|
||
sc.tl.leiden(adata, resolution=0.9) | ||
# sc.pl.umap(adata, color=['leiden', 'CST3', 'NKG7'], show=False) | ||
# save_and_compare_images('umap_2') | ||
sc.tl.leiden(adata, resolution=0.9, random_state=0) | ||
|
||
# sc.pl.umap(adata, color=["leiden", "CST3", "NKG7"], show=False) | ||
# save_and_compare_images("umap_2") | ||
sc.pl.scatter(adata, "CST3", "NKG7", color="leiden", show=False) | ||
save_and_compare_images("scatter_3") | ||
|
||
# Finding marker genes | ||
# Due to incosistency with our test runner vs local, these clusters need to | ||
# be pre-annotated as the numbers for each cluster are not consistent. | ||
marker_genes = [ | ||
"RP11-18H21.1", | ||
"GZMK", | ||
"CD79A", | ||
"FCGR3A", | ||
"GNLY", | ||
"S100A8", | ||
"FCER1A", | ||
"PPBP", | ||
] | ||
new_labels = ["0", "1", "2", "3", "4", "5", "6", "7"] | ||
data_df = adata[:, marker_genes].to_df() | ||
data_df["leiden"] = adata.obs["leiden"] | ||
max_idxs = data_df.groupby("leiden", observed=True).mean().idxmax() | ||
leiden_relabel = {} | ||
for marker_gene, new_label in zip(marker_genes, new_labels): | ||
leiden_relabel[max_idxs[marker_gene]] = new_label | ||
adata.obs["leiden_old"] = adata.obs["leiden"].copy() | ||
adata.rename_categories( | ||
"leiden", [leiden_relabel[key] for key in sorted(leiden_relabel.keys())] | ||
) | ||
# ensure that the column can be sorted for consistent plotting since it is by default unordered | ||
adata.obs["leiden"] = adata.obs["leiden"].cat.reorder_categories( | ||
list(map(str, range(len(adata.obs["leiden"].cat.categories)))), ordered=True | ||
) | ||
|
||
sc.tl.rank_genes_groups(adata, "leiden") | ||
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False, show=False) | ||
|
@@ -132,23 +159,22 @@ def test_pbmc3k(image_comparer): | |
if adata[adata.obs["leiden"] == "4", "CST3"].X.mean() < 1: | ||
( # switch clusters | ||
adata.obs["leiden"][adata.obs["leiden"] == "4"], | ||
adata.obs["leiden"][adata.obs["leiden"] == "5"], | ||
) = ("5", "4") | ||
adata.obs["leiden"][adata.obs["leiden"] == "3"], | ||
) = ("3", "4") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you manually order them in the first place, why reorder here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't want to touch whatever this is doing since it's based on mean expression. |
||
new_cluster_names = [ | ||
"CD4 T cells", | ||
"CD14+ Monocytes", | ||
"B cells", | ||
"CD8 T cells", | ||
"NK cells", | ||
"B cells", | ||
"FCGR3A+ Monocytes", | ||
"NK cells", | ||
"CD14+ Monocytes", | ||
"Dendritic cells", | ||
"Megakaryocytes", | ||
] | ||
adata.rename_categories("leiden", new_cluster_names) | ||
|
||
# sc.pl.umap(adata, color='leiden', legend_loc='on data', title='', frameon=False, show=False) | ||
# save_and_compare_images('umap_3') | ||
|
||
sc.pl.violin( | ||
adata, ["CST3", "NKG7", "PPBP"], groupby="leiden", rotation=90, show=False | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No reason not to do this