Merge branch 'branch-25.02' into nn_descent_python

rapidsai · Feb 4, 2025 · a3f3d15 · a3f3d15
2 parents cb7857e + 8c683b0
commit a3f3d15
Show file tree

Hide file tree

Showing 37 changed files with 999 additions and 138 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -208,6 +208,9 @@ if(BUILD_SHARED_LIBS)
     src/neighbors/cagra_search_int8.cu
     src/neighbors/cagra_search_uint8.cu
     src/neighbors/detail/cagra/compute_distance.cu
+    src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim128_t8.cu
+    src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim256_t16.cu
+    src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim512_t32.cu
     src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu
     src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu
     src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu
@@ -469,6 +472,7 @@ if(BUILD_SHARED_LIBS)
     src/neighbors/vamana_serialize_uint8.cu
     src/neighbors/vamana_serialize_int8.cu
     src/preprocessing/quantize/scalar.cu
+    src/preprocessing/quantize/binary.cu
     src/selection/select_k_float_int64_t.cu
     src/selection/select_k_float_int32_t.cu
     src/selection/select_k_float_uint32_t.cu

diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
@@ -519,12 +519,10 @@ void register_search(std::shared_ptr<const dataset<T>> dataset,
 
 template <typename T>
 void dispatch_benchmark(std::string cmdline,
-                        const configuration& conf,
+                        configuration& conf,
                         bool force_overwrite,
                         bool build_mode,
                         bool search_mode,
-                        std::string data_prefix,
-                        std::string index_prefix,
                         kv_series override_kv,
                         Mode metric_objective,
                         const std::vector<int>& threads,
@@ -539,11 +537,10 @@ void dispatch_benchmark(std::string cmdline,
       ::benchmark::AddCustomContext(key, value);
     }
   }
-  const auto dataset_conf = conf.get_dataset_conf();
-  auto base_file          = combine_path(data_prefix, dataset_conf.base_file);
-  auto query_file         = combine_path(data_prefix, dataset_conf.query_file);
-  auto gt_file            = dataset_conf.groundtruth_neighbors_file;
-  if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); }
+  auto& dataset_conf = conf.get_dataset_conf();
+  auto base_file     = dataset_conf.base_file;
+  auto query_file    = dataset_conf.query_file;
+  auto gt_file       = dataset_conf.groundtruth_neighbors_file;
   auto dataset =
     std::make_shared<bench::dataset<T>>(dataset_conf.name,
                                         base_file,
@@ -555,7 +552,7 @@ void dispatch_benchmark(std::string cmdline,
                                         search_mode ? dataset_conf.filtering_rate : std::nullopt);
   ::benchmark::AddCustomContext("dataset", dataset_conf.name);
   ::benchmark::AddCustomContext("distance", dataset_conf.distance);
-  std::vector<configuration::index> indices = conf.get_indices();
+  std::vector<configuration::index>& indices = conf.get_indices();
   if (build_mode) {
     if (file_exists(base_file)) {
       log_info("Using the dataset file '%s'", base_file.c_str());
@@ -570,10 +567,10 @@ void dispatch_benchmark(std::string cmdline,
       for (auto param : apply_overrides(index.build_param, override_kv)) {
         auto modified_index        = index;
         modified_index.build_param = param;
-        modified_index.file        = combine_path(index_prefix, modified_index.file);
         more_indices.push_back(modified_index);
       }
     }
+    std::swap(more_indices, indices);  // update the config in case algorithms need to access it
     register_build<T>(dataset, more_indices, force_overwrite, no_lap_sync);
   } else if (search_mode) {
     if (file_exists(query_file)) {
@@ -601,7 +598,6 @@ void dispatch_benchmark(std::string cmdline,
     }
     for (auto& index : indices) {
       index.search_params = apply_overrides(index.search_params, override_kv);
-      index.file          = combine_path(index_prefix, index.file);
     }
     register_search<T>(dataset, indices, metric_objective, threads, no_lap_sync);
   }
@@ -726,7 +722,7 @@ inline auto run_main(int argc, char** argv) -> int
     log_warn("cudart library is not found, GPU-based indices won't work.");
   }
 
-  configuration conf(conf_stream);
+  auto& conf        = bench::configuration::initialize(conf_stream, data_prefix, index_prefix);
   std::string dtype = conf.get_dataset_conf().dtype;
 
   if (dtype == "float") {
@@ -735,8 +731,6 @@ inline auto run_main(int argc, char** argv) -> int
                               force_overwrite,
                               build_mode,
                               search_mode,
-                              data_prefix,
-                              index_prefix,
                               override_kv,
                               metric_objective,
                               threads,
@@ -747,8 +741,6 @@ inline auto run_main(int argc, char** argv) -> int
                              force_overwrite,
                              build_mode,
                              search_mode,
-                             data_prefix,
-                             index_prefix,
                              override_kv,
                              metric_objective,
                              threads,
@@ -759,8 +751,6 @@ inline auto run_main(int argc, char** argv) -> int
                                      force_overwrite,
                                      build_mode,
                                      search_mode,
-                                     data_prefix,
-                                     index_prefix,
                                      override_kv,
                                      metric_objective,
                                      threads,
@@ -771,8 +761,6 @@ inline auto run_main(int argc, char** argv) -> int
                                     force_overwrite,
                                     build_mode,
                                     search_mode,
-                                    data_prefix,
-                                    index_prefix,
                                     override_kv,
                                     metric_objective,
                                     threads,

diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
@@ -57,31 +57,53 @@ class configuration {
     std::optional<double> filtering_rate{std::nullopt};
   };
 
-  explicit inline configuration(std::istream& conf_stream)
+  [[nodiscard]] inline auto get_dataset_conf() const -> const dataset_conf&
+  {
+    return dataset_conf_;
+  }
+  [[nodiscard]] inline auto get_dataset_conf() -> dataset_conf& { return dataset_conf_; }
+  [[nodiscard]] inline auto get_indices() const -> const std::vector<index>& { return indices_; };
+  [[nodiscard]] inline auto get_indices() -> std::vector<index>& { return indices_; };
+
+  /** The benchmark initializes the configuration once and has a chance to modify it during the
+   * setup. */
+  static inline auto initialize(std::istream& conf_stream,
+                                std::string data_prefix,
+                                std::string index_prefix) -> configuration&
+  {
+    singleton_ =
+      std::unique_ptr<configuration>(new configuration{conf_stream, data_prefix, index_prefix});
+    return *singleton_;
+  }
+
+  /** Any algorithm can access the benchmark configuration as an immutable context. */
+  [[nodiscard]] static inline auto singleton() -> const configuration& { return *singleton_; }
+
+ private:
+  explicit inline configuration(std::istream& conf_stream,
+                                std::string data_prefix,
+                                std::string index_prefix)
   {
     // to enable comments in json
     auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
 
-    parse_dataset(conf.at("dataset"));
-    parse_index(conf.at("index"), conf.at("search_basic_param"));
+    parse_dataset(conf.at("dataset"), data_prefix);
+    parse_index(conf.at("index"), conf.at("search_basic_param"), index_prefix);
   }
 
-  [[nodiscard]] inline auto get_dataset_conf() const -> dataset_conf { return dataset_conf_; }
-  [[nodiscard]] inline auto get_indices() const -> std::vector<index> { return indices_; };
-
- private:
-  inline void parse_dataset(const nlohmann::json& conf)
+  inline void parse_dataset(const nlohmann::json& conf, std::string data_prefix)
   {
     dataset_conf_.name       = conf.at("name");
-    dataset_conf_.base_file  = conf.at("base_file");
-    dataset_conf_.query_file = conf.at("query_file");
+    dataset_conf_.base_file  = combine_path(data_prefix, conf.at("base_file"));
+    dataset_conf_.query_file = combine_path(data_prefix, conf.at("query_file"));
     dataset_conf_.distance   = conf.at("distance");
     if (conf.contains("filtering_rate")) {
       dataset_conf_.filtering_rate.emplace(conf.at("filtering_rate"));
     }
 
     if (conf.contains("groundtruth_neighbors_file")) {
-      dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file");
+      dataset_conf_.groundtruth_neighbors_file =
+        combine_path(data_prefix, conf.at("groundtruth_neighbors_file"));
     }
     if (conf.contains("subset_first_row")) {
       dataset_conf_.subset_first_row = conf.at("subset_first_row");
@@ -108,7 +130,9 @@ class configuration {
       }
     }
   }
-  inline void parse_index(const nlohmann::json& index_conf, const nlohmann::json& search_basic_conf)
+  inline void parse_index(const nlohmann::json& index_conf,
+                          const nlohmann::json& search_basic_conf,
+                          std::string index_prefix)
   {
     const int batch_size = search_basic_conf.at("batch_size");
     const int k          = search_basic_conf.at("k");
@@ -118,7 +142,7 @@ class configuration {
       index.name        = conf.at("name");
       index.algo        = conf.at("algo");
       index.build_param = conf.at("build_param");
-      index.file        = conf.at("file");
+      index.file        = combine_path(index_prefix, conf.at("file"));
       index.batch_size  = batch_size;
       index.k           = k;
 
@@ -147,6 +171,8 @@ class configuration {
 
   dataset_conf dataset_conf_;
   std::vector<index> indices_;
+
+  static inline std::unique_ptr<configuration> singleton_ = nullptr;
 };
 
 }  // namespace cuvs::bench
diff --git a/cpp/include/cuvs/distance/distance.h b/cpp/include/cuvs/distance/distance.h
@@ -62,6 +62,8 @@ typedef enum {
   RusselRaoExpanded = 18,
   /** Dice-Sorensen distance **/
   DiceExpanded = 19,
+  /** Bitstring Hamming distance **/
+  BitwiseHamming = 20,
   /** Precomputed (special value) **/
   Precomputed = 100
 } cuvsDistanceType;

diff --git a/cpp/include/cuvs/neighbors/brute_force.h b/cpp/include/cuvs/neighbors/brute_force.h
@@ -68,8 +68,7 @@ cuvsError_t cuvsBruteForceIndexDestroy(cuvsBruteForceIndex_t index);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -120,7 +119,8 @@ cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
  *        It is also important to note that the BRUTEFORCE index must have been built
  *        with the same type of `queries`, such that `index.dtype.code ==
  *        queries.dl_tensor.dtype.code` Types for input are:
- *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or
+ *          `kDLDataType.bits = 16`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *

diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h
@@ -333,8 +333,9 @@ cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -421,8 +422,9 @@ cuvsError_t cuvsCagraExtend(cuvsResources_t res,
  * queries.dl_tensor.dtype.code` Types for input are:
  *        1. `queries`:
  *          a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *          b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *          c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *          b. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *          c. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *          d. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *

diff --git a/cpp/include/cuvs/neighbors/ivf_pq.h b/cpp/include/cuvs/neighbors/ivf_pq.h
@@ -258,8 +258,9 @@ cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -314,6 +315,7 @@ cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
  *        with the same type of `queries`, such that `index.dtype.code ==
  * queries.dl_tensor.dtype.code` Types for input are:
  *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *            or `kDLDataType.bits = 16`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *