uxlfoundation · samir-nasibli · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
@@ -22,6 +22,7 @@
     "target_offload": "auto",
     "allow_fallback_to_host": False,
     "allow_sklearn_after_onedal": True,
+    "use_raw_input": False,
 }
 
 _threadlocal = threading.local()

@@ -180,30 +180,42 @@ def support_input_format(freefunc=False, queue_param=True):
 
     def decorator(func):
         def wrapper_impl(obj, *args, **kwargs):
-            if len(args) == 0 and len(kwargs) == 0:
+            if _get_config()["use_raw_input"] is True:
+                if "queue" not in kwargs:
+                    usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None)
+                    data_queue = (
+                        usm_iface["syclobj"] if usm_iface is not None else data_queue
+                    )
+                    kwargs["queue"] = data_queue
+                return _run_on_device(func, obj, *args, **kwargs)
+
+            elif len(args) == 0 and len(kwargs) == 0:
                 return _run_on_device(func, obj, *args, **kwargs)
-            data = (*args, *kwargs.values())
-            data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs)
-            if queue_param and not (
-                "queue" in hostkwargs and hostkwargs["queue"] is not None
-            ):
-                hostkwargs["queue"] = data_queue
-            result = _run_on_device(func, obj, *hostargs, **hostkwargs)
-            usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None)
-            if usm_iface is not None:
-                result = _copy_to_usm(data_queue, result)
-                if dpnp_available and isinstance(data[0], dpnp.ndarray):
-                    result = _convert_to_dpnp(result)
+
+            else:
+                data = (*args, *kwargs.values())
+                data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs)
+                if queue_param and not (
+                    "queue" in hostkwargs and hostkwargs["queue"] is not None
+                ):
+                    hostkwargs["queue"] = data_queue
+                result = _run_on_device(func, obj, *hostargs, **hostkwargs)
+                usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None)
+                if usm_iface is not None:
+                    result = _copy_to_usm(data_queue, result)
+                    if dpnp_available and isinstance(data[0], dpnp.ndarray):
+                        result = _convert_to_dpnp(result)
+                    return result
+                if not get_config().get("transform_output", False):
+                    input_array_api = getattr(
+                        data[0], "__array_namespace__", lambda: None
+                    )()
+                    if input_array_api:
+                        input_array_api_device = data[0].device
+                        result = _asarray(
+                            result, input_array_api, device=input_array_api_device
+                        )
                 return result
-            config = get_config()
-            if not ("transform_output" in config and config["transform_output"]):
-                input_array_api = getattr(data[0], "__array_namespace__", lambda: None)()
-                if input_array_api:
-                    input_array_api_device = data[0].device
-                    result = _asarray(
-                        result, input_array_api, device=input_array_api_device
-                    )
-            return result
 
         if freefunc:
 

@@ -14,14 +14,15 @@
 # limitations under the License.
 # ==============================================================================
 
-import warnings
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
 
+from .._config import _get_config
 from ..common._base import BaseEstimator
 from ..datatypes import _convert_to_supported, from_table, to_table
 from ..utils import _is_csr
+from ..utils._array_api import _get_sycl_namespace
 from ..utils.validation import _check_array
 
 
@@ -72,27 +73,35 @@ def __init__(self, result_options="all", algorithm="by_default"):
         super().__init__(result_options, algorithm)
 
     def fit(self, data, sample_weight=None, queue=None):
-        policy = self._get_policy(queue, data, sample_weight)
-
         is_csr = _is_csr(data)
 
-        if data is not None and not is_csr:
-            data = _check_array(data, ensure_2d=False)
-        if sample_weight is not None:
-            sample_weight = _check_array(sample_weight, ensure_2d=False)
+        use_raw_input = _get_config().get("use_raw_input", False) is True
+
+        # All data should use the same sycl queue
+        if use_raw_input and _get_sycl_namespace(data)[0] is not None:
+            queue = data.sycl_queue
 
+        if not use_raw_input:
+            if data is not None and not is_csr:
+                data = _check_array(data, ensure_2d=False)
+            if sample_weight is not None:
+                sample_weight = _check_array(sample_weight, ensure_2d=False)
+
+        # TODO
+        # use xp for dtype.
+        policy = self._get_policy(queue, data, sample_weight)
         data, sample_weight = _convert_to_supported(policy, data, sample_weight)
-        is_single_dim = data.ndim == 1
-        data_table, weights_table = to_table(data, sample_weight)
+
+        data_table = to_table(data, sua_iface=_get_sycl_namespace(data)[0])
+        weights_table = to_table(
+            sample_weight, sua_iface=_get_sycl_namespace(sample_weight)[0]
+        )
 
         dtype = data.dtype
         raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr)
         for opt, raw_value in raw_result.items():
             value = from_table(raw_value).ravel()
-            if is_single_dim:
-                setattr(self, opt, value[0])
-            else:
-                setattr(self, opt, value)
+            setattr(self, opt, value[0]) if data.ndim == 1 else setattr(self, opt, value)
 
         return self
 

@@ -18,8 +18,10 @@
 
 from daal4py.sklearn._utils import get_dtype
 
+from .._config import _get_config
 from ..datatypes import _convert_to_supported, from_table, to_table
 from ..utils import _check_array
+from ..utils._array_api import _get_sycl_namespace
 from .basic_statistics import BaseBasicStatistics
 
 
@@ -93,26 +95,39 @@ def partial_fit(self, X, weights=None, queue=None):
         self : object
             Returns the instance itself.
         """
+        use_raw_input = _get_config().get("use_raw_input", False) is True
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        # Saving input array namespace and sua_iface, that will be used in
+        # finalize_fit.
+        self._input_sua_iface = sua_iface
+        self._input_xp = xp
+
+        # All data should use the same sycl queue
+        if use_raw_input and sua_iface is not None:
+            queue = X.sycl_queue
+
         self._queue = queue
         policy = self._get_policy(queue, X)
         X, weights = _convert_to_supported(policy, X, weights)
 
-        X = _check_array(
-            X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
-        )
-        if weights is not None:
-            weights = _check_array(
-                weights,
-                dtype=[np.float64, np.float32],
-                ensure_2d=False,
-                force_all_finite=False,
+        if not use_raw_input:
+            X = _check_array(
+                X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
             )
+            if weights is not None:
+                weights = _check_array(
+                    weights,
+                    dtype=[np.float64, np.float32],
+                    ensure_2d=False,
+                    force_all_finite=False,
+                )
 
         if not hasattr(self, "_onedal_params"):
             dtype = get_dtype(X)
             self._onedal_params = self._get_onedal_params(False, dtype=dtype)
 
-        X_table, weights_table = to_table(X, weights)
+        X_table = to_table(X, sua_iface=sua_iface)
+        weights_table = to_table(weights, sua_iface=_get_sycl_namespace(weights)[0])
         self._partial_result = self._get_backend(
             "basic_statistics",
             None,
@@ -140,10 +155,8 @@ def finalize_fit(self, queue=None):
             Returns the instance itself.
         """
 
-        if queue is not None:
-            policy = self._get_policy(queue)
-        else:
-            policy = self._get_policy(self._queue)
+        queue = queue if queue is not None else self._queue
+        policy = self._get_policy(queue)
 
         result = self._get_backend(
             "basic_statistics",
@@ -155,6 +168,14 @@ def finalize_fit(self, queue=None):
         )
         options = self._get_result_options(self.options).split("|")
         for opt in options:
-            setattr(self, opt, from_table(getattr(result, opt)).ravel())
+            opt_value = self._input_xp.ravel(
+                from_table(
+                    getattr(result, opt),
+                    sua_iface=self._input_sua_iface,
+                    sycl_queue=queue,
+                    xp=self._input_xp,
+                )
+            )
+            setattr(self, opt, opt_value)
 
         return self
@@ -18,10 +18,12 @@
 
 from daal4py.sklearn._utils import get_dtype, make2d
 
+from .._config import _get_config
 from ..common._base import BaseEstimator
 from ..common._mixin import ClusterMixin
 from ..datatypes import _convert_to_supported, from_table, to_table
 from ..utils import _check_array
+from ..utils._array_api import _get_sycl_namespace
 
 
 class BaseDBSCAN(BaseEstimator, ClusterMixin):
@@ -57,27 +59,50 @@ def _get_onedal_params(self, dtype=np.float32):
         }
 
     def _fit(self, X, y, sample_weight, module, queue):
+        use_raw_input = _get_config().get("use_raw_input", False) is True
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+
+        # All data should use the same sycl queue
+        if use_raw_input and sua_iface is not None:
+            queue = X.sycl_queue
+
         policy = self._get_policy(queue, X)
-        X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
-        sample_weight = make2d(sample_weight) if sample_weight is not None else None
-        X = make2d(X)
+
+        if not use_raw_input:
+            X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
+            sample_weight = make2d(sample_weight) if sample_weight is not None else None
+            X = make2d(X)
 
         types = [np.float32, np.float64]
         if get_dtype(X) not in types:
             X = X.astype(np.float64)
         X = _convert_to_supported(policy, X)
         dtype = get_dtype(X)
         params = self._get_onedal_params(dtype)
-        result = module.compute(policy, params, to_table(X), to_table(sample_weight))
 
-        self.labels_ = from_table(result.responses).ravel()
+        X_table = to_table(X, sua_iface=sua_iface)
+        weights_table = to_table(
+            sample_weight, sua_iface=_get_sycl_namespace(sample_weight)[0]
+        )
+
+        result = module.compute(policy, params, X_table, weights_table)
+
+        self.labels_ = xp.reshape(
+            from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1
+        )
         if result.core_observation_indices is not None:
-            self.core_sample_indices_ = from_table(
-                result.core_observation_indices
-            ).ravel()
+            self.core_sample_indices_ = xp.reshape(
+                from_table(
+                    result.core_observation_indices,
+                    sua_iface=sua_iface,
+                    sycl_queue=queue,
+                    xp=xp,
+                ),
+                -1,
+            )
         else:
-            self.core_sample_indices_ = np.array([], dtype=np.intc)
-        self.components_ = np.take(X, self.core_sample_indices_, axis=0)
+            self.core_sample_indices_ = xp.array([], dtype=xp.int32)
+        self.components_ = xp.take(X, self.core_sample_indices_, axis=0)
         self.n_features_in_ = X.shape[1]
         return self
 

@@ -32,10 +32,12 @@
 from sklearn.metrics.pairwise import euclidean_distances
 from sklearn.utils import check_random_state
 
+from .._config import _get_config
 from ..common._base import BaseEstimator as onedal_BaseEstimator
 from ..common._mixin import ClusterMixin, TransformerMixin
 from ..datatypes import _convert_to_supported, from_table, to_table
 from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr
+from ..utils._array_api import _get_sycl_namespace
 
 
 class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
@@ -80,7 +82,7 @@ def _get_kmeans_init(self, cluster_count, seed, algorithm):
     def _get_basic_statistics_backend(self, result_options):
         return BasicStatistics(result_options)
 
-    def _tolerance(self, X_table, rtol, is_csr, policy, dtype):
+    def _tolerance(self, X_table, rtol, is_csr, policy, dtype, sua_iface):
         """Compute absolute tolerance from the relative tolerance"""
         if rtol == 0.0:
             return rtol
@@ -94,7 +96,7 @@ def _tolerance(self, X_table, rtol, is_csr, policy, dtype):
         return mean_var * rtol
 
     def _check_params_vs_input(
-        self, X_table, is_csr, policy, default_n_init=10, dtype=np.float32
+        self, X_table, is_csr, policy, default_n_init=10, dtype=np.float32, sua_iface=None
     ):
         # n_clusters
         if X_table.shape[0] < self.n_clusters:
@@ -103,7 +105,7 @@ def _check_params_vs_input(
             )
 
         # tol
-        self._tol = self._tolerance(X_table, self.tol, is_csr, policy, dtype)
+        self._tol = self._tolerance(X_table, self.tol, is_csr, policy, dtype, sua_iface)
 
         # n-init
         # TODO(1.4): Remove
@@ -261,18 +263,33 @@ def _fit_backend(
         )
 
     def _fit(self, X, module, queue=None):
-        policy = self._get_policy(queue, X)
         is_csr = _is_csr(X)
-        X = _check_array(
-            X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False
-        )
+
+        use_raw_input = _get_config().get("use_raw_input") is True
+        if use_raw_input and _get_sycl_namespace(X)[0] is not None:
-        if use_raw_input and _get_sycl_namespace(X)[0] is not None:
+        if use_raw_input and sua_iface is not None:
-        if use_raw_input and _get_sycl_namespace(X)[0] is not None:
+        if use_raw_input and sua_iface is not None:
+            queue = X.sycl_queue
+
+        if not use_raw_input:
+            X = _check_array(
+                X,
+                dtype=[np.float64, np.float32],
+                accept_sparse="csr",
+                force_all_finite=False,
+            )
+
+        policy = self._get_policy(queue, X)
+
         X = _convert_to_supported(policy, X)
         dtype = get_dtype(X)
-        X_table = to_table(X)
+        sua_iface = _get_sycl_namespace(X)[0]
+        X_table = to_table(X, sua_iface=sua_iface)
 
-        self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype)
+        self._check_params_vs_input(
+            X_table, is_csr, policy, dtype=dtype, sua_iface=sua_iface
+        )
 
-        params = self._get_onedal_params(is_csr, dtype)
+        # not used?
+        # params = self._get_onedal_params(is_csr, dtype)
 
         self.n_features_in_ = X_table.column_count