Skip to content

Commit

Permalink
Add {obs,var}_names_make_unique! (#30)
Browse files Browse the repository at this point in the history
* add counterpart of Python AnnData's var_names_make_unique

* test counterpart of Python AnnData's var_names_make_unique

* document counterpart of Python AnnData's var_names_make_unique

* add/test/doc counterpart of Python AnnData's obs_names_make_unique

* try to fix Logging compat

* try to fix make_unique docs examples

* fix: typo in duplicateindices

* fixup! fix: typo in duplicateindices

---------

Co-authored-by: jdm204 <[email protected]>
  • Loading branch information
jdm204 and jdm204 authored Feb 10, 2025
1 parent b6cd389 commit 284597a
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 2 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ CompressHashDisplace = "0.1.2"
DataFrames = "0.22, 1"
FileIO = "1.6"
HDF5 = "0.16 - 0.99, 1"
Logging = "1.6.7"
OrderedCollections = "1.6"
PooledArrays = "1"
StructArrays = "0.6.4"
julia = "1.5"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"

[targets]
test = ["Test"]
test = ["Test", "Logging"]
16 changes: 16 additions & 0 deletions docs/src/objects.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,22 @@ ad_sub[[true,false,true],:]
ad_sub[[1,3],:]
```

### Operations on AnnData Objects

Make duplicate `var_names` unique by appending a numbered suffix.

```@example 1
import Muon: var_names_make_unique! # hide
var_names_make_unique!(ad)
```

Similarly, we can make `obs_names` unique also.

```@example 1
import Muon: obs_names_make_unique! # hide
obs_names_make_unique!(ad)
```

## MuData

The basic idea behind a multimodal object is _key_ ``\rightarrow`` _value_ relationship where _keys_ represent the unique names of individual modalities and _values_ are `AnnData` objects that contain the correposnding data. Similarly to `AnnData` objects, `MuData` objects can also contain rich multimodal annotations.
Expand Down
1 change: 1 addition & 0 deletions src/Muon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ using FileIO

export readh5mu, readh5ad, writeh5mu, writeh5ad, isbacked, update_obs!, update_var!, update!
export AnnData, MuData
export var_names_make_unique!, obs_names_make_unique!

import Pkg
# this executes only during precompilation
Expand Down
75 changes: 75 additions & 0 deletions src/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,78 @@ Base.firstindex(A::Union{AbstractMuData, AbstractAnnData}, d::Integer) = 1
Base.lastindex(A::Union{AbstractMuData, AbstractAnnData}, d::Integer) = size(A, d)

Base.copy(d::Union{MuDataView, AnnDataView}) = parent(d)[parentindices(d)...]

"""
var_names_make_unique!(A::AnnData, join = '-')
Make `A.var_names` unique by appending `join` and sequential numbers
(1, 2, 3 etc) to duplicate elements, leaving the first unchanged.
"""
function var_names_make_unique!(A::AnnData, join='-')
index_make_unique!(A.var_names, join)
end

"""
obs_names_make_unique!(A::AnnData, join = '-')
Make `A.obs_names` unique by appending `join` and sequential numbers
(1, 2, 3 etc) to duplicate elements, leaving the first unchanged.
"""
function obs_names_make_unique!(A::AnnData, join='-')
index_make_unique!(A.obs_names, join)
end

function index_make_unique!(index, join)
duplicates = duplicateindices(index)

if isempty(duplicates)
@info "var names are already unique, doing nothing"
return nothing
end

example_colliding_names = []
set = Set(index)

for (name, positions) in duplicates
i = 1
for pos in Iterators.rest(positions, 2)
while true
potential = string(index[pos], join, i)
i += 1
if potential in set
if length(example_colliding_names) <= 5
push!(example_colliding_names, potential)
end
else
index[pos] = potential
push!(set, potential)
break
end
end
end
end

if !isempty(example_colliding_names)
@warn """
Appending $(join)[1-9...] to duplicates caused collision with another name.
Example(s): $example_colliding_names
This may make the names hard to interperet.
Consider setting a different delimiter with `join={delimiter}`
"""
end
end

function duplicateindices(v::Muon.Index{T, I}) where {T <: AbstractString, I <: Integer}
varnames = Dict{T, Vector{Int64}}()

for i in eachindex(v)
if haskey(varnames, v[i])
push!(varnames[v[i]], i)
else
varnames[v[i]] = [i]
end
end

filter!(x -> length(last(x)) > 1, varnames)
varnames
end
22 changes: 22 additions & 0 deletions test/anndata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,25 @@ end
@test copy(adview).X == subad.X
test_ad_slicing(subad, 50, 5, x[i, 3:7])
end

@testset "unique names" begin
@test_logs (:info,) var_names_make_unique!(ad)
@test_logs (:info,) obs_names_make_unique!(ad)
ad2 = deepcopy(ad)
ad2.var_names[3] == "10"
ad2.obs_names[90] == "obs_30"
var_names_make_unique!(ad2)
obs_names_make_unique!(ad2)
@test allunique(ad2.var_names)
@test allunique(ad2.obs_names)
ad2.var_names[10] = "10-1"
ad2.var_names[3] = "10"
ad2.var_names[4] = "10"
ad2.obs_names[11] = "obs_10-1"
ad2.obs_names[10] = "obs_10"
ad2.obs_names[9] = "obs_10"
@test_logs (:warn,) var_names_make_unique!(ad2)
@test_logs (:warn,) obs_names_make_unique!(ad2)
@test allunique(ad2.var_names)
@test allunique(ad2.obs_names)
end
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using Muon, Test
using Muon, Test, Logging

@testset "Index" begin
include("index.jl")
Expand Down

0 comments on commit 284597a

Please sign in to comment.