Skip to content

Commit

Permalink
[Fix] Duplicated node issue on Cagra NN-descent
Browse files Browse the repository at this point in the history
- fix issue: #626
  • Loading branch information
rhdong committed Feb 5, 2025
1 parent 45703bf commit 26f514b
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 7 deletions.
4 changes: 2 additions & 2 deletions cpp/src/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -574,10 +574,10 @@ index<T, IdxT> build(
size_t intermediate_degree = params.intermediate_graph_degree;
size_t graph_degree = params.graph_degree;
if (intermediate_degree >= static_cast<size_t>(dataset.extent(0))) {
intermediate_degree = dataset.extent(0) - 1;
RAFT_LOG_WARN(
"Intermediate graph degree cannot be larger than dataset size, reducing it to %lu",
dataset.extent(0));
intermediate_degree = dataset.extent(0) - 1;
intermediate_degree);
}
if (intermediate_degree < graph_degree) {
RAFT_LOG_WARN(
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/neighbors/detail/cagra/graph_core.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1242,8 +1242,10 @@ void optimize(
}
if (pk != output_graph_degree) {
RAFT_LOG_DEBUG(
"Couldn't find the output_graph_degree (%lu) smallest detourable count nodes for "
"Couldn't find the output_graph_degree (%lu vs pk: %lu) smallest detourable count nodes "
"for "
"node %lu in the rank-based node reranking process",
pk,
output_graph_degree,
i);
invalid_neighbor_list = true;
Expand Down
16 changes: 13 additions & 3 deletions cpp/src/neighbors/detail/nn_descent.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,13 @@ int insert_to_ordered_list(InternalID_t<Index_t>* list,
int idx_insert = width;
bool position_found = false;
for (int i = 0; i < width; i++) {
if (list[i].id() == neighb_id.id()) { return width; }
if (list[i].id() == neighb_id.id()) {
if (dist_list[i] == std::numeric_limits<DistData_t>::max() && dist != dist_list[i]) {
idx_insert = i;
dist_list[i] = dist;
}
return idx_insert;
}
if (!position_found && dist_list[i] > dist) {
idx_insert = i;
position_found = true;
Expand Down Expand Up @@ -1044,6 +1050,7 @@ void GnndGraph<Index_t>::sample_graph_new(InternalID_t<Index_t>* new_neighbors,
template <typename Index_t>
void GnndGraph<Index_t>::init_random_graph()
{
const bool small_dataset = (nrow <= 4 * segment_size);
for (size_t seg_idx = 0; seg_idx < static_cast<size_t>(num_segments); seg_idx++) {
// random sequence (range: 0~nrow)
// segment_x stores neighbors which id % num_segments == x
Expand All @@ -1057,14 +1064,17 @@ void GnndGraph<Index_t>::init_random_graph()
size_t base_idx = i * node_degree + seg_idx * segment_size;
auto h_neighbor_list = h_graph + base_idx;
auto h_dist_list = h_dists.data_handle() + base_idx;
size_t idx = base_idx;
for (size_t j = 0; j < static_cast<size_t>(segment_size); j++) {
size_t idx = base_idx + j;
Index_t id = rand_seq[idx % rand_seq.size()] * num_segments + seg_idx;
if ((size_t)id == i) {
id = rand_seq[(idx + segment_size) % rand_seq.size()] * num_segments + seg_idx;
idx = small_dataset ? (idx + 1) : (idx + segment_size);
id = rand_seq[idx % rand_seq.size()] * num_segments + seg_idx;
}

h_neighbor_list[j].id_with_flag() = id;
h_dist_list[j] = std::numeric_limits<DistData_t>::max();
idx++;
}
}
}
Expand Down
20 changes: 19 additions & 1 deletion cpp/tests/neighbors/ann_cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,26 @@ inline std::vector<AnnCagraInputs> generate_inputs()
{true},
{0.995});

// Corner case for small dataset
auto inputs = raft::util::itertools::product<AnnCagraInputs>(
{2},
{3, 31, 32, 64, 101},
{1, 128},
{2, 3}, // k
{graph_build_algo::NN_DESCENT},
{search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL},
{0}, // query size
{0},
{256},
{1},
{cuvs::distance::DistanceType::L2Expanded},
{false},
{true},
{0.995});
inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());

// Fixed dim, and changing neighbors and query size (output matrix size)
auto inputs2 = raft::util::itertools::product<AnnCagraInputs>(
inputs2 = raft::util::itertools::product<AnnCagraInputs>(
{1, 100},
{1000},
{8},
Expand Down

0 comments on commit 26f514b

Please sign in to comment.