Skip to content

Commit

Permalink
add/test/doc counterpart of Python AnnData's obs_names_make_unique
Browse files Browse the repository at this point in the history
  • Loading branch information
jdm204 committed Feb 4, 2025
1 parent 908b96b commit 471761a
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 6 deletions.
6 changes: 6 additions & 0 deletions docs/src/objects.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ Make duplicate `var_names` unique by appending a numbered suffix.
var_names_make_unique!(ad)
```

Similarly, we can make `obs_names` unique also.

```@example 1
obs_names_make_unique!(ad)
```

## MuData

The basic idea behind a multimodal object is _key_ ``\rightarrow`` _value_ relationship where _keys_ represent the unique names of individual modalities and _values_ are `AnnData` objects that contain the correposnding data. Similarly to `AnnData` objects, `MuData` objects can also contain rich multimodal annotations.
Expand Down
2 changes: 1 addition & 1 deletion src/Muon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using FileIO

export readh5mu, readh5ad, writeh5mu, writeh5ad, isbacked, update_obs!, update_var!, update!
export AnnData, MuData
export var_names_make_unique!
export var_names_make_unique!, obs_names_make_unique!

import Pkg
# this executes only during precompilation
Expand Down
22 changes: 18 additions & 4 deletions src/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,28 +122,42 @@ Make `A.var_names` unique by appending `join` and sequential numbers
(1, 2, 3 etc) to duplicate elements, leaving the first unchanged.
"""
function var_names_make_unique!(A::AnnData, join='-')
duplicates = duplicateindicies(A.var_names)
index_make_unique!(A.var_names, join)
end

"""
obs_names_make_unique!(A::AnnData, join = '-')
Make `A.obs_names` unique by appending `join` and sequential numbers
(1, 2, 3 etc) to duplicate elements, leaving the first unchanged.
"""
function obs_names_make_unique!(A::AnnData, join='-')
index_make_unique!(A.obs_names, join)
end

function index_make_unique!(index, join)
duplicates = duplicateindicies(index)

if isempty(duplicates)
@info "var names are already unique, doing nothing"
return nothing
end

example_colliding_names = []
set = Set(A.var_names)
set = Set(index)

for (name, positions) in duplicates
i = 1
for pos in Iterators.rest(positions, 2)
while true
potential = string(A.var_names[pos], join, i)
potential = string(index[pos], join, i)
i += 1
if potential in set
if length(example_colliding_names) <= 5
push!(example_colliding_names, potential)
end
else
A.var_names[pos] = potential
index[pos] = potential
push!(set, potential)
break
end
Expand Down
11 changes: 10 additions & 1 deletion test/anndata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,24 @@ end
test_ad_slicing(subad, 50, 5, x[i, 3:7])
end

@testset "anndata functions" begin
@testset "unique names" begin
@test_logs (:info,) var_names_make_unique!(ad)
@test_logs (:info,) obs_names_make_unique!(ad)
ad2 = deepcopy(ad)
ad2.var_names[3] == "10"
ad2.obs_names[90] == "obs_30"
var_names_make_unique!(ad2)
obs_names_make_unique!(ad2)
@test allunique(ad2.var_names)
@test allunique(ad2.obs_names)
ad2.var_names[10] = "10-1"
ad2.var_names[3] = "10"
ad2.var_names[4] = "10"
ad2.obs_names[11] = "obs_10-1"
ad2.obs_names[10] = "obs_10"
ad2.obs_names[9] = "obs_10"
@test_logs (:warn,) var_names_make_unique!(ad2)
@test_logs (:warn,) obs_names_make_unique!(ad2)
@test allunique(ad2.var_names)
@test allunique(ad2.obs_names)
end

0 comments on commit 471761a

Please sign in to comment.