Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-25.02 into branch-25.04 #638

Merged
merged 1 commit into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cpp/include/cuvs/neighbors/cagra.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,13 @@ struct search_params : cuvs::neighbors::search_params {
* impact on the throughput.
*/
float persistent_device_usage = 1.0;

/**
* A parameter indicating the rate of nodes to be filtered-out, when filtering is used.
* The value must be equal to or greater than 0.0 and less than 1.0. Default value is
* negative, in which case the filtering rate is automatically calculated.
*/
float filtering_rate = -1.0;
};

/**
Expand Down
19 changes: 15 additions & 4 deletions cpp/src/neighbors/cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,13 @@ void search(raft::resources const& res,
const cuvs::neighbors::filtering::base_filter& sample_filter_ref)
{
try {
using none_filter_type = cuvs::neighbors::filtering::none_sample_filter;
auto& sample_filter = dynamic_cast<const none_filter_type&>(sample_filter_ref);
using none_filter_type = cuvs::neighbors::filtering::none_sample_filter;
auto& sample_filter = dynamic_cast<const none_filter_type&>(sample_filter_ref);
search_params params_copy = params;
if (params.filtering_rate < 0.0) { params_copy.filtering_rate = 0.0; }
auto sample_filter_copy = sample_filter;
return search_with_filtering<T, IdxT, none_filter_type>(
res, params, idx, queries, neighbors, distances, sample_filter_copy);
res, params_copy, idx, queries, neighbors, distances, sample_filter_copy);
return;
} catch (const std::bad_cast&) {
}
Expand All @@ -349,9 +351,18 @@ void search(raft::resources const& res,
auto& sample_filter =
dynamic_cast<const cuvs::neighbors::filtering::bitset_filter<uint32_t, int64_t>&>(
sample_filter_ref);
search_params params_copy = params;
if (params.filtering_rate < 0.0) {
const auto num_set_bits = sample_filter.bitset_view_.count(res);
auto filtering_rate = (float)(idx.data().n_rows() - num_set_bits) / idx.data().n_rows();
const float min_filtering_rate = 0.0;
const float max_filtering_rate = 0.999;
params_copy.filtering_rate =
std::min(std::max(filtering_rate, min_filtering_rate), max_filtering_rate);
}
auto sample_filter_copy = sample_filter;
return search_with_filtering<T, IdxT, decltype(sample_filter_copy)>(
res, params, idx, queries, neighbors, distances, sample_filter_copy);
res, params_copy, idx, queries, neighbors, distances, sample_filter_copy);
} catch (const std::bad_cast&) {
RAFT_FAIL("Unsupported sample filter type");
}
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/neighbors/detail/cagra/search_multi_cta.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,14 @@ struct search : public search_plan_impl<DataT, IndexT, DistanceT, SAMPLE_FILTER_

void set_params(raft::resources const& res, const search_params& params)
{
size_t global_itopk_size = itopk_size;
constexpr unsigned multi_cta_itopk_size = 32;
this->itopk_size = multi_cta_itopk_size;
search_width = 1;
RAFT_LOG_DEBUG("params.itopk_size: %lu", (uint64_t)params.itopk_size);
RAFT_LOG_DEBUG("global_itopk_size: %lu", (uint64_t)global_itopk_size);
num_cta_per_query =
max(params.search_width, raft::ceildiv(params.itopk_size, (size_t)multi_cta_itopk_size));
max(params.search_width, raft::ceildiv(global_itopk_size, (size_t)multi_cta_itopk_size));
result_buffer_size = itopk_size + (search_width * graph_degree);
typedef raft::Pow2<32> AlignBytes;
unsigned result_buffer_size_32 = AlignBytes::roundUp(result_buffer_size);
Expand Down
20 changes: 15 additions & 5 deletions cpp/src/neighbors/detail/cagra/search_plan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,9 @@ struct search_plan_impl_base : public search_params {
if (itopk_size <= 512 && search_params::max_queries >= num_sm * 2lu) {
algo = search_algo::SINGLE_CTA;
RAFT_LOG_DEBUG("Auto strategy: selecting single-cta");
} else if (topk <= 1024) {
} else {
algo = search_algo::MULTI_CTA;
RAFT_LOG_DEBUG("Auto strategy: selecting multi-cta");
} else {
algo = search_algo::MULTI_KERNEL;
RAFT_LOG_DEBUG("Auto strategy: selecting multi kernel");
}
}
}
Expand All @@ -150,7 +147,6 @@ struct search_plan_impl : public search_plan_impl_base {
uint32_t result_buffer_size;

uint32_t smem_size;
uint32_t topk;
uint32_t num_seeds;

lightweight_uvector<INDEX_T> hashmap;
Expand Down Expand Up @@ -217,6 +213,20 @@ struct search_plan_impl : public search_plan_impl_base {
"# max_iterations is increased from %lu to %u.", max_iterations, _max_iterations);
max_iterations = _max_iterations;
}
if (algo == search_algo::MULTI_CTA && (0.0 < filtering_rate && filtering_rate < 1.0)) {
size_t adjusted_itopk_size =
(size_t)((float)topk / (1.0 - filtering_rate) +
(float)(itopk_size - topk) / std::sqrt(1.0 - filtering_rate));
if (adjusted_itopk_size % 32) { adjusted_itopk_size += 32 - (adjusted_itopk_size % 32); }
if (itopk_size < adjusted_itopk_size) {
RAFT_LOG_DEBUG(
"# internal_topk is increased from %lu to %lu, considering fintering rate %f.",
itopk_size,
adjusted_itopk_size,
filtering_rate);
itopk_size = adjusted_itopk_size;
}
}
if (itopk_size % 32) {
uint32_t itopk32 = itopk_size;
itopk32 += 32 - (itopk_size % 32);
Expand Down
5 changes: 2 additions & 3 deletions docs/source/indexes/cagra.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ while the CPU can be leveraged for search.
Filtering considerations
------------------------

CAGRA supports filtered search which can work well for moderately small filters (such as filtering out only a small percentage of the vectors in the index (e.g. <<50%).
CAGRA supports filtered search and has improved multi-CTA algorithm in branch-25.02 to provide reasonable recall and performance for filtering rate as high as 90% or more.

When a filter is expected to remove 80%-99% of the vectors in the index, it is preferred to use brute-force with pre-filtering instead, as that will compute only those distances
between the vectors not being filtered out. By default, CAGRA will pass the filter to the pre-filtered brute-force when the number of vevtors being filtered out is >90% of the vectors in the index.
To obtain an appropriate recall in fitered search, it is necessary to set search parameters according to the filtering rate, but since it is difficult for users to to this, CAGRA automatically adjusts `itopk_size` internally according to the filtering rate on a heuristic basis. If you want to disable this automatic adjustment, set `filtering_rate`, one of the search parameters, to `0.0`, and `itopk_size` will not be adjusted automatically.

Configuration parameters
------------------------
Expand Down
Loading