From 8907d9a376484ec47d591ceea6e6d1fab53bb9b4 Mon Sep 17 00:00:00 2001 From: Lukicheva Polina Date: Mon, 14 Feb 2022 07:27:31 -0600 Subject: [PATCH] Enable Compute Follows Data in Cython in fft, random, linalg Add q_ref as parametr to dpnp_memory_free_c and some fixies Cancel changes in random module Modify cython layer for rfft Skip test_eig on CPU and change test_qr Rollback updates in random Finalize CFD support in fft Finalize CFD support in linalg --- .github/workflows/conda-package.yml | 29 ++- dpnp/backend/kernels/dpnp_krnl_common.cpp | 23 +- dpnp/backend/kernels/dpnp_krnl_fft.cpp | 179 ++++++++------ dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 57 ++++- dpnp/dpnp_algo/dpnp_algo.pxd | 5 +- dpnp/dpnp_iface.py | 7 +- dpnp/fft/dpnp_algo_fft.pyx | 71 +++++- dpnp/fft/dpnp_iface_fft.py | 40 +-- dpnp/linalg/dpnp_algo_linalg.pyx | 274 ++++++++++++++++++--- dpnp/linalg/dpnp_iface_linalg.py | 20 +- tests/skipped_tests.tbl | 169 +++++++++---- tests/skipped_tests_gpu.tbl | 108 ++------ tests/test_fft.py | 123 +++++----- tests/test_linalg.py | 69 ++++-- tests/test_sycl_queue.py | 286 +++++++++++++++++++++- 15 files changed, 1069 insertions(+), 391 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index fb307a54031..3d628bcff85 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -246,16 +246,39 @@ jobs: - name: List installed packages run: conda list - - name: Smoke test + - name: Smoke test without envs + run: python -c "import dpnp, dpctl; dpctl.lsplatform()" + + - name: Smoke test with OCL_ICD_FILENAMES + run: python -c "import dpnp, dpctl; dpctl.lsplatform()" + env: + OCL_ICD_FILENAMES: 'libintelocl.so' + + - name: Smoke test with SYCL_ENABLE_HOST_DEVICE run: python -c "import dpnp, dpctl; dpctl.lsplatform()" + env: + SYCL_ENABLE_HOST_DEVICE: '1' + + - name: Smoke test with both envs + run: python -c "import dpnp, dpctl; dpctl.lsplatform()" + env: + OCL_ICD_FILENAMES: 'libintelocl.so' + SYCL_ENABLE_HOST_DEVICE: '1' # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests - run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_mathematical.py test_special.py + run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_fft.py test_linalg.py test_mathematical.py test_special.py env: + OCL_ICD_FILENAMES: 'libintelocl.so' SYCL_ENABLE_HOST_DEVICE: '1' working-directory: ${{ env.tests-path }} + - name: Run tests without SYCL_ENABLE_HOST_DEVICE + run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_fft.py test_linalg.py test_mathematical.py test_special.py + env: + OCL_ICD_FILENAMES: 'libintelocl.so' + working-directory: ${{ env.tests-path }} + test_windows: needs: build_windows @@ -426,7 +449,7 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests - run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_mathematical.py test_special.py + run: python -m pytest -q -ra --disable-warnings -vv test_arraycreation.py test_dparray.py test_fft.py test_linalg.py test_mathematical.py test_special.py working-directory: ${{ env.tests-path }} upload_linux: diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp index 7ae9127041a..541b34d4fbc 100644 --- a/dpnp/backend/kernels/dpnp_krnl_common.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2022, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -97,6 +97,7 @@ void dpnp_astype_c(const void* array1_in, void* result1, const size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -477,6 +478,7 @@ void dpnp_dot_c(void* result_out, input2_strides, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -614,6 +616,7 @@ void dpnp_eig_c(const void* array_in, void* result1, void* result2, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -707,6 +710,7 @@ void dpnp_eigvals_c(const void* array_in, void* result1, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -774,7 +778,7 @@ void dpnp_initval_c(void* result1, void* value, size_t size) size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); - + DPCTLEvent_Delete(event_ref); } template @@ -941,6 +945,7 @@ void dpnp_matmul_c(void* result_out, input2_strides, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -1112,11 +1117,25 @@ void func_map_init_linalg(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_EIG][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eig_default_c}; fmap[DPNPFuncName::DPNP_FN_EIG][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eig_default_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIG_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eig_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_eigvals_default_c}; fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_eigvals_default_c}; fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eigvals_default_c}; fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eigvals_default_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_INT][eft_INT] = {eft_DBL, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_LNG][eft_LNG] = {eft_DBL, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_FLT][eft_FLT] = {eft_FLT, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_EIGVALS_EXT][eft_DBL][eft_DBL] = {eft_DBL, + (void*)dpnp_eigvals_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_initval_default_c}; fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_initval_default_c}; fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_initval_default_c}; diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp index 23f61201272..3d39f2f373c 100644 --- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2022, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -48,17 +48,17 @@ template -void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, - const void* array1_in, - void* result_out, - const shape_elem_type* input_shape, - const shape_elem_type* output_shape, - size_t shape_size, - const size_t result_size, - const size_t input_size, - long axis, - long input_boundarie, - size_t inverse) +static void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, + const void* array1_in, + void* result_out, + const shape_elem_type* input_shape, + const shape_elem_type* output_shape, + size_t shape_size, + const size_t result_size, + const size_t input_size, + long axis, + long input_boundarie, + size_t inverse) { if (!(input_size && result_size && shape_size)) { @@ -71,9 +71,8 @@ void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, sycl::queue queue = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, array1_in, input_size); - const _DataType_input* array_1 = input1_ptr.get_ptr(); - _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); + _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast(array1_in)); + _DataType_output* result = static_cast<_DataType_output *>(result_out); // kernel specific temporal data shape_elem_type* output_shape_offsets = @@ -171,29 +170,28 @@ void dpnp_fft_fft_sycl_c(DPCTLSyclQueueRef q_ref, } template -void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref, - const void* array1_in, - void* result_out, - const shape_elem_type* input_shape, - const shape_elem_type*, - const size_t shape_size, - const size_t input_size, - const size_t result_size, - _Descriptor_type& desc, - size_t inverse, - const size_t norm) +static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref, + const void* array1_in, + void* result_out, + const shape_elem_type* input_shape, + const shape_elem_type* result_shape, + const size_t shape_size, + const size_t input_size, + const size_t result_size, + _Descriptor_type& desc, + size_t inverse, + const size_t norm) { - if (!shape_size) - { + (void)result_shape; + + if (!shape_size) { return; } sycl::queue queue = *(reinterpret_cast(q_ref)); - DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, array1_in, input_size); - DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size); - _DataType_input* array_1 = input1_ptr.get_ptr(); - _DataType_output* result = result_ptr.get_ptr(); + _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast(array1_in)); + _DataType_output* result = static_cast<_DataType_output *>(result_out); const size_t n_iter = std::accumulate(input_shape, input_shape + shape_size - 1, 1, std::multiplies()); @@ -242,31 +240,29 @@ template -void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, - const void* array1_in, - void* result_out, - const shape_elem_type* input_shape, - const shape_elem_type* result_shape, - const size_t shape_size, - const size_t input_size, - const size_t result_size, - _Descriptor_type& desc, - size_t inverse, - const size_t norm, - const size_t real) +static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, + const void* array1_in, + void* result_out, + const shape_elem_type* input_shape, + const shape_elem_type* result_shape, + const size_t shape_size, + const size_t input_size, + const size_t result_size, + _Descriptor_type& desc, + size_t inverse, + const size_t norm, + const size_t real) { - if (!shape_size) - { - return; + DPCTLSyclEventRef event_ref = nullptr; + if (!shape_size) { + return event_ref; } - DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, array1_in, input_size); - DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size * 2, true, true); - _DataType_input* array_1 = input1_ptr.get_ptr(); - _DataType_output* result = result_ptr.get_ptr(); - sycl::queue queue = *(reinterpret_cast(q_ref)); + _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast(array1_in)); + _DataType_output* result = static_cast<_DataType_output *>(result_out); + const size_t n_iter = std::accumulate(input_shape, input_shape + shape_size - 1, 1, std::multiplies()); @@ -308,7 +304,7 @@ void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, sycl::event::wait(fft_events); if (real) { // the output size of the rfft function is input_size/2 + 1 so we don't need to fill the second half of the output - return; + return event_ref; } size_t n_conj = result_shift % 2 == 0 ? result_shift / 2 - 1 : result_shift / 2; @@ -322,7 +318,8 @@ void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, { size_t j = global_id[1]; { - *(reinterpret_cast*>(result) + result_shift * (i + 1) - (j + 1)) = std::conj(*(reinterpret_cast*>(result) + result_shift * i + (j + 1))); + *(reinterpret_cast*>(result) + result_shift * (i + 1) - (j + 1)) = + std::conj(*(reinterpret_cast*>(result) + result_shift * i + (j + 1))); } } }; @@ -333,14 +330,18 @@ void dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef q_ref, }; event = queue.submit(kernel_func); - event.wait(); if (inverse) { - event = oneapi::mkl::vm::conj(queue, result_size, reinterpret_cast*>(result), reinterpret_cast*>(result)); event.wait(); + event = oneapi::mkl::vm::conj(queue, + result_size, + reinterpret_cast*>(result), + reinterpret_cast*>(result)); } - return; + event_ref = reinterpret_cast(&event); + + return DPCTLEvent_Copy(event_ref); } template @@ -394,7 +395,7 @@ DPCTLSyclEventRef dpnp_fft_fft_c(DPCTLSyclQueueRef q_ref, { desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0); } /* real-to-complex, single precision */ @@ -402,26 +403,33 @@ DPCTLSyclEventRef dpnp_fft_fft_c(DPCTLSyclQueueRef q_ref, std::is_same<_DataType_output, std::complex>::value) { desc_sp_real_t desc(dim); // try: 2 * result_size - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( + + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0); } else if constexpr (std::is_same<_DataType_input, int32_t>::value || std::is_same<_DataType_input, int64_t>::value) { - double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(input_size * sizeof(double))); + double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(q_ref, input_size * sizeof(double))); shape_elem_type* copy_strides = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_strides = 1; shape_elem_type* copy_shape = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_shape = input_size; shape_elem_type copy_shape_size = 1; - dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, - array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + event_ref = dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, + array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c( q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); + event_ref = nullptr; + dpnp_memory_free_c(q_ref, array1_copy); dpnp_memory_free_c(q_ref, copy_strides); dpnp_memory_free_c(q_ref, copy_shape); @@ -470,6 +478,7 @@ void dpnp_fft_fft_c(const void* array1_in, norm, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -524,7 +533,6 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref, size_t dim = input_shape[shape_size - 1]; - if constexpr (std::is_same<_DataType_output, std::complex>::value || std::is_same<_DataType_output, std::complex>::value) { @@ -533,7 +541,7 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref, { desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1); } /* real-to-complex, single precision */ @@ -541,26 +549,32 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref, std::is_same<_DataType_output, std::complex>::value) { desc_sp_real_t desc(dim); // try: 2 * result_size - dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>( q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1); } else if constexpr (std::is_same<_DataType_input, int32_t>::value || std::is_same<_DataType_input, int64_t>::value) { - double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(input_size * sizeof(double))); + double* array1_copy = reinterpret_cast(dpnp_memory_alloc_c(q_ref, input_size * sizeof(double))); shape_elem_type* copy_strides = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_strides = 1; shape_elem_type* copy_shape = reinterpret_cast(dpnp_memory_alloc_c(q_ref, sizeof(shape_elem_type))); *copy_shape = input_size; shape_elem_type copy_shape_size = 1; - dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, - array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + event_ref = dpnp_copyto_c<_DataType_input, double>(q_ref, array1_copy, input_size, copy_shape_size, copy_shape, copy_strides, + array1_in, input_size, copy_shape_size, copy_shape, copy_strides, NULL, dep_event_vec_ref); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); desc_dp_real_t desc(dim); - dpnp_fft_fft_mathlib_real_to_cmplx_c( + event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c( q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1); + DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); + event_ref = nullptr; + dpnp_memory_free_c(q_ref, array1_copy); dpnp_memory_free_c(q_ref, copy_strides); dpnp_memory_free_c(q_ref, copy_shape); @@ -596,6 +610,7 @@ void dpnp_fft_rfft_c(const void* array1_in, norm, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -636,6 +651,20 @@ void func_map_init_fft_func(func_map_t& fmap) eft_C64, (void*)dpnp_fft_fft_default_c, std::complex>}; fmap[DPNPFuncName::DPNP_FN_FFT_FFT][eft_C128][eft_C128] = { eft_C128, (void*)dpnp_fft_fft_default_c, std::complex>}; + + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_INT][eft_INT] = { + eft_C128, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_LNG][eft_LNG] = { + eft_C128, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_FLT][eft_FLT] = { + eft_C64, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_DBL][eft_DBL] = { + eft_C128, (void*)dpnp_fft_fft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_C64][eft_C64] = { + eft_C64, (void*)dpnp_fft_fft_ext_c, std::complex>}; + fmap[DPNPFuncName::DPNP_FN_FFT_FFT_EXT][eft_C128][eft_C128] = { + eft_C128, (void*)dpnp_fft_fft_ext_c, std::complex>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT][eft_INT][eft_INT] = { eft_C128, (void*)dpnp_fft_rfft_default_c>}; fmap[DPNPFuncName::DPNP_FN_FFT_RFFT][eft_LNG][eft_LNG] = { @@ -644,5 +673,15 @@ void func_map_init_fft_func(func_map_t& fmap) eft_C64, (void*)dpnp_fft_rfft_default_c>}; fmap[DPNPFuncName::DPNP_FN_FFT_RFFT][eft_DBL][eft_DBL] = { eft_C128, (void*)dpnp_fft_rfft_default_c>}; + + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_INT][eft_INT] = { + eft_C128, (void*)dpnp_fft_rfft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_LNG][eft_LNG] = { + eft_C128, (void*)dpnp_fft_rfft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_FLT][eft_FLT] = { + eft_C64, (void*)dpnp_fft_rfft_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_FFT_RFFT_EXT][eft_DBL][eft_DBL] = { + eft_C128, (void*)dpnp_fft_rfft_ext_c>}; + return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp index 77bdad0c6b1..dff1320d5c2 100644 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2022, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -122,6 +122,7 @@ void dpnp_cholesky_c(void* array1_in, void* result1, const size_t size, const si data_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -263,6 +264,7 @@ void dpnp_det_c(void* array1_in, void* result1, shape_elem_type* shape, size_t n ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -405,6 +407,7 @@ void dpnp_inv_c(void* array1_in, void* result1, shape_elem_type* shape, size_t n ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -528,6 +531,7 @@ void dpnp_kron_c(void* array1_in, ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -616,6 +620,7 @@ void dpnp_matrix_rank_c(void* array1_in, void* result1, shape_elem_type* shape, ndim, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -753,6 +758,7 @@ void dpnp_qr_c(void* array1_in, void* result1, void* result2, void* result3, siz size_n, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -852,6 +858,7 @@ void dpnp_svd_c(void* array1_in, void* result1, void* result2, void* result3, si size_n, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template @@ -872,16 +879,29 @@ void func_map_init_linalg_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cholesky_default_c}; fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cholesky_default_c}; + fmap[DPNPFuncName::DPNP_FN_CHOLESKY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cholesky_ext_c}; + fmap[DPNPFuncName::DPNP_FN_CHOLESKY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cholesky_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_det_default_c}; fmap[DPNPFuncName::DPNP_FN_DET][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_det_default_c}; fmap[DPNPFuncName::DPNP_FN_DET][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_det_default_c}; fmap[DPNPFuncName::DPNP_FN_DET][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_det_default_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_det_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_inv_default_c}; fmap[DPNPFuncName::DPNP_FN_INV][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_inv_default_c}; fmap[DPNPFuncName::DPNP_FN_INV][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_inv_default_c}; fmap[DPNPFuncName::DPNP_FN_INV][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_inv_default_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_inv_ext_c}; + fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_kron_default_c}; fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_LNG] = {eft_LNG, @@ -989,6 +1009,11 @@ void func_map_init_linalg_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_matrix_rank_default_c}; fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_matrix_rank_default_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_matrix_rank_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_qr_default_c}; fmap[DPNPFuncName::DPNP_FN_QR][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_qr_default_c}; fmap[DPNPFuncName::DPNP_FN_QR][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_qr_default_c}; @@ -996,12 +1021,34 @@ void func_map_init_linalg_func(func_map_t& fmap) // fmap[DPNPFuncName::DPNP_FN_QR][eft_C128][eft_C128] = { // eft_C128, (void*)dpnp_qr_c, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_qr_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_qr_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_qr_ext_c}; + fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_qr_ext_c}; + // fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_C128][eft_C128] = { + // eft_C128, (void*)dpnp_qr_c, std::complex>}; + + fmap[DPNPFuncName::DPNP_FN_SVD][eft_INT][eft_INT] = {eft_DBL, + (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_SVD][eft_LNG][eft_LNG] = {eft_DBL, + (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_SVD][eft_FLT][eft_FLT] = {eft_FLT, + (void*)dpnp_svd_default_c}; + fmap[DPNPFuncName::DPNP_FN_SVD][eft_DBL][eft_DBL] = {eft_DBL, + (void*)dpnp_svd_default_c}; fmap[DPNPFuncName::DPNP_FN_SVD][eft_C128][eft_C128] = { eft_C128, (void*)dpnp_svd_default_c, std::complex, double>}; + + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_INT][eft_INT] = {eft_DBL, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_LNG][eft_LNG] = {eft_DBL, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_FLT][eft_FLT] = {eft_FLT, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_DBL][eft_DBL] = {eft_DBL, + (void*)dpnp_svd_ext_c}; + fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_C128][eft_C128] = { + eft_C128, (void*)dpnp_svd_ext_c, std::complex, double>}; return; } diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 2d3f1a7870a..e604a71f449 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -26,8 +26,6 @@ # ***************************************************************************** cimport dpctl as c_dpctl -cimport dpctl as c_dpctl - from libcpp cimport bool as cpp_bool from dpnp.dpnp_utils.dpnp_algo_utils cimport dpnp_descriptor @@ -145,6 +143,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_FFT_FFT DPNP_FN_FFT_FFT_EXT DPNP_FN_FFT_RFFT + DPNP_FN_FFT_RFFT_EXT DPNP_FN_FILL_DIAGONAL DPNP_FN_FILL_DIAGONAL_EXT DPNP_FN_FLATTEN diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index d0fccd6fcae..c3d80c25033 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -219,7 +219,12 @@ def get_dpnp_descriptor(ext_obj, copy_when_strides=True, copy_when_nondefault_qu ext_obj_offset = 0 if ext_obj.strides != shape_offsets or ext_obj_offset != 0: - ext_obj = array(ext_obj) + # DPCTL will create a copy of array with default sycl queue + # if both sycl_queue and device parameters are None. + # While it is required to create the copy on the same device + # to be compliant with compute follows data approach. + arr_obj = unwrap_array(ext_obj) + ext_obj = array(ext_obj, sycl_queue = getattr(arr_obj, "sycl_queue", None)) # while dpnp functions are based on DPNP_QUEUE # we need to create a copy on device associated with DPNP_QUEUE diff --git a/dpnp/fft/dpnp_algo_fft.pyx b/dpnp/fft/dpnp_algo_fft.pyx index d63c7bf9fc6..393c744d4f3 100644 --- a/dpnp/fft/dpnp_algo_fft.pyx +++ b/dpnp/fft/dpnp_algo_fft.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -42,8 +42,9 @@ __all__ = [ "dpnp_rfft" ] -ctypedef void(*fptr_dpnp_fft_fft_t)(void *, void * , shape_elem_type * , shape_elem_type * , - size_t, long, long, size_t, size_t) +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_fft_fft_t)(c_dpctl.DPCTLSyclQueueRef, void *, void * , + shape_elem_type * , shape_elem_type * , size_t, long, + long, size_t, size_t, const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_fft(utils.dpnp_descriptor input, @@ -63,15 +64,39 @@ cpdef utils.dpnp_descriptor dpnp_fft(utils.dpnp_descriptor input, cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_FFT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_FFT_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_fft_fft_t func = kernel_data.ptr # call FPTR function - func(input.get_data(), result.get_data(), input_shape.data(), - output_shape.data(), input_shape.size(), axis_norm, input_boundarie, inverse, norm) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + output_shape.data(), + input_shape.size(), + axis_norm, + input_boundarie, + inverse, + norm, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -93,14 +118,38 @@ cpdef utils.dpnp_descriptor dpnp_rfft(utils.dpnp_descriptor input, cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_RFFT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_FFT_RFFT_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(output_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef fptr_dpnp_fft_fft_t func = kernel_data.ptr # call FPTR function - func(input.get_data(), result.get_data(), input_shape.data(), - output_shape.data(), input_shape.size(), axis_norm, input_boundarie, inverse, norm) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + output_shape.data(), + input_shape.size(), + axis_norm, + input_boundarie, + inverse, + norm, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/fft/dpnp_iface_fft.py b/dpnp/fft/dpnp_iface_fft.py index 952a9c72a8b..986dfaa8c61 100644 --- a/dpnp/fft/dpnp_iface_fft.py +++ b/dpnp/fft/dpnp_iface_fft.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -100,7 +100,7 @@ def fft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: norm_ = get_validated_norm(norm) @@ -144,7 +144,7 @@ def fft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -185,7 +185,7 @@ def fftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -231,7 +231,7 @@ def fftshift(x1, axes=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_= Norm.backward @@ -263,7 +263,7 @@ def hfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = get_validated_norm(norm) @@ -305,7 +305,7 @@ def ifft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: norm_ = get_validated_norm(norm) @@ -348,7 +348,7 @@ def ifft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -372,7 +372,7 @@ def ifftshift(x1, axes=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = Norm.backward @@ -385,7 +385,7 @@ def ifftshift(x1, axes=None): if x1_desc.size < 1: pass # let fallback to handle exception else: - return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, False, norm_.value).get_pyobj() + return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, True, norm_.value).get_pyobj() return call_origin(numpy.fft.ifftshift, x1, axes) @@ -406,7 +406,7 @@ def ifftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -453,7 +453,7 @@ def ihfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = get_validated_norm(norm) @@ -478,7 +478,7 @@ def ihfft(x1, n=None, axis=-1, norm=None): else: output_boundarie = input_boundarie - return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, False, norm_.value).get_pyobj() + return dpnp_fft(x1_desc, input_boundarie, output_boundarie, axis_param, True, norm_.value).get_pyobj() return call_origin(numpy.fft.ihfft, x1, n, axis, norm) @@ -497,7 +497,7 @@ def irfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: norm_ = get_validated_norm(norm) @@ -548,7 +548,7 @@ def irfft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -574,7 +574,7 @@ def irfftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -621,7 +621,7 @@ def rfft(x1, n=None, axis=-1, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: norm_ = get_validated_norm(norm) @@ -670,7 +670,7 @@ def rfft2(x1, s=None, axes=(-2, -1), norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if norm is not None: pass @@ -711,7 +711,7 @@ def rfftn(x1, s=None, axes=None, norm=None): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc and 0: if s is None: boundaries = tuple([x1_desc.shape[i] for i in range(x1_desc.ndim)]) @@ -738,7 +738,7 @@ def rfftn(x1, s=None, axes=None, norm=None): except IndexError: checker_throw_axis_error("fft.rfftn", "is out of bounds", param_axis, f"< {len(boundaries)}") - x1_iter_desc = dpnp.get_dpnp_descriptor(x1_iter) + x1_iter_desc = dpnp.get_dpnp_descriptor(x1_iter, copy_when_nondefault_queue=False) x1_iter = rfft(x1_iter_desc.get_pyobj(), n=param_n, axis=param_axis, norm=norm) return x1_iter diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx index 04efad5c600..e6b239eb880 100644 --- a/dpnp/linalg/dpnp_algo_linalg.pyx +++ b/dpnp/linalg/dpnp_algo_linalg.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2022, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -56,12 +56,24 @@ __all__ = [ # C function pointer to the C library template functions -ctypedef void(*custom_linalg_1in_1out_func_ptr_t)(void *, void * , shape_elem_type * , size_t) -ctypedef void(*custom_linalg_1in_1out_func_ptr_t_)(void * , void * , size_t * ) -ctypedef void(*custom_linalg_1in_1out_with_size_func_ptr_t_)(void *, void * , size_t) -ctypedef void(*custom_linalg_1in_1out_with_2size_func_ptr_t_)(void *, void * , size_t, size_t) -ctypedef void(*custom_linalg_1in_3out_shape_t)(void *, void * , void * , void * , size_t , size_t ) -ctypedef void(*custom_linalg_2in_1out_func_ptr_t)(void *, void * , void * , size_t ) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * ,shape_elem_type * , + size_t, const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef, + void * , void * , size_t * , + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_with_size_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_with_2size_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , size_t, size_t, + const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_3out_shape_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void * , void * , + size_t , size_t, const c_dpctl.DPCTLEventVectorRef) +ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_2in_1out_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, + void *, void * , void * , size_t, + const c_dpctl.DPCTLEventVectorRef) cpdef utils.dpnp_descriptor dpnp_cholesky(utils.dpnp_descriptor input_): @@ -69,14 +81,34 @@ cpdef utils.dpnp_descriptor dpnp_cholesky(utils.dpnp_descriptor input_): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input_.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOLESKY, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOLESKY_EXT, param1_type, param1_type) + + input_obj = input_.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_.shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_.shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_with_2size_func_ptr_t_ func = kernel_data.ptr - func(input_.get_data(), result.get_data(), input_.size, size_) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input_.get_data(), + result.get_data(), + input_.size, + size_, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -119,14 +151,34 @@ cpdef utils.dpnp_descriptor dpnp_det(utils.dpnp_descriptor input): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DET, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DET_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_func_ptr_t func = kernel_data.ptr - func(input.get_data(), result.get_data(), input_shape.data(), input.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + input.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -137,16 +189,41 @@ cpdef tuple dpnp_eig(utils.dpnp_descriptor x1): cdef size_t size = 0 if x1_shape.empty() else x1_shape.front() cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIG, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIG_EXT, param1_type, param1_type) result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type) - cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), kernel_data.return_type, None) - cdef utils.dpnp_descriptor res_vec = utils.create_output_descriptor(x1_shape, kernel_data.return_type, None) + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_vec = utils.create_output_descriptor(x1_shape, + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = res_val.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_2in_1out_func_ptr_t func = kernel_data.ptr # call FPTR function - func(x1.get_data(), res_val.get_data(), res_vec.get_data(), size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + res_val.get_data(), + res_vec.get_data(), + size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return (res_val.get_pyobj(), res_vec.get_pyobj()) @@ -157,14 +234,33 @@ cpdef utils.dpnp_descriptor dpnp_eigvals(utils.dpnp_descriptor input): cdef size_t size = 0 if input_shape.empty() else input_shape.front() cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIGVALS, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIGVALS_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), kernel_data.return_type, None) + cdef utils.dpnp_descriptor res_val = utils.create_output_descriptor((size,), + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = res_val.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_with_size_func_ptr_t_ func = kernel_data.ptr # call FPTR function - func(input.get_data(), res_val.get_data(), size) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + res_val.get_data(), + size, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return res_val @@ -174,14 +270,34 @@ cpdef utils.dpnp_descriptor dpnp_inv(utils.dpnp_descriptor input): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INV, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INV_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape, kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape, + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_func_ptr_t func = kernel_data.ptr - func(input.get_data(), result.get_data(), input_shape.data(), input.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + input.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -190,14 +306,34 @@ cpdef utils.dpnp_descriptor dpnp_matrix_rank(utils.dpnp_descriptor input): cdef shape_type_c input_shape = input.shape cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MATRIX_RANK, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MATRIX_RANK_EXT, param1_type, param1_type) + + input_obj = input.get_array() # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), kernel_data.return_type, None) + cdef utils.dpnp_descriptor result = utils.create_output_descriptor((1,), + kernel_data.return_type, + None, + device=input_obj.sycl_device, + usm_type=input_obj.usm_type, + sycl_queue=input_obj.sycl_queue) + + result_sycl_queue = result.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_1out_func_ptr_t func = kernel_data.ptr - func(input.get_data(), result.get_data(), input_shape.data(), input.ndim) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + input.get_data(), + result.get_data(), + input_shape.data(), + input.ndim, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return result @@ -312,15 +448,47 @@ cpdef tuple dpnp_qr(utils.dpnp_descriptor x1, str mode): cdef size_t size_tau = min_m_n cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_QR, param1_type, param1_type) - - cdef utils.dpnp_descriptor res_q = utils.create_output_descriptor((size_m, min_m_n), kernel_data.return_type, None) - cdef utils.dpnp_descriptor res_r = utils.create_output_descriptor((min_m_n, size_n), kernel_data.return_type, None) - cdef utils.dpnp_descriptor tau = utils.create_output_descriptor((size_tau, ), kernel_data.return_type, None) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_QR_EXT, param1_type, param1_type) + + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor res_q = utils.create_output_descriptor((size_m, min_m_n), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_r = utils.create_output_descriptor((min_m_n, size_n), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor tau = utils.create_output_descriptor((size_tau, ), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = res_q.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_3out_shape_t func = < custom_linalg_1in_3out_shape_t > kernel_data.ptr - func(x1.get_data(), res_q.get_data(), res_r.get_data(), tau.get_data(), size_m, size_n) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + res_q.get_data(), + res_r.get_data(), + tau.get_data(), + size_m, + size_n, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return (res_q.get_pyobj(), res_r.get_pyobj()) @@ -331,18 +499,50 @@ cpdef tuple dpnp_svd(utils.dpnp_descriptor x1, cpp_bool full_matrices, cpp_bool cdef size_t size_s = min(size_m, size_n) cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SVD, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SVD_EXT, param1_type, param1_type) cdef DPNPFuncType type_s = DPNP_FT_DOUBLE if x1.dtype == dpnp.float32: type_s = DPNP_FT_FLOAT - cdef utils.dpnp_descriptor res_u = utils.create_output_descriptor((size_m, size_m), kernel_data.return_type, None) - cdef utils.dpnp_descriptor res_s = utils.create_output_descriptor((size_s, ), type_s, None) - cdef utils.dpnp_descriptor res_vt = utils.create_output_descriptor((size_n, size_n), kernel_data.return_type, None) + x1_obj = x1.get_array() + + cdef utils.dpnp_descriptor res_u = utils.create_output_descriptor((size_m, size_m), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_s = utils.create_output_descriptor((size_s, ), + type_s, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + cdef utils.dpnp_descriptor res_vt = utils.create_output_descriptor((size_n, size_n), + kernel_data.return_type, + None, + device=x1_obj.sycl_device, + usm_type=x1_obj.usm_type, + sycl_queue=x1_obj.sycl_queue) + + result_sycl_queue = res_u.get_array().sycl_queue + + cdef c_dpctl.SyclQueue q = result_sycl_queue + cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() cdef custom_linalg_1in_3out_shape_t func = < custom_linalg_1in_3out_shape_t > kernel_data.ptr - func(x1.get_data(), res_u.get_data(), res_s.get_data(), res_vt.get_data(), size_m, size_n) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, + x1.get_data(), + res_u.get_data(), + res_s.get_data(), + res_vt.get_data(), + size_m, + size_n, + NULL) # dep_events_ref + + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + c_dpctl.DPCTLEvent_Delete(event_ref) return (res_u.get_pyobj(), res_s.get_pyobj(), res_vt.get_pyobj()) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 2db35c5d2ac..43a26c1b530 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -88,14 +88,14 @@ def cholesky(input): matrix object if `input` is a matrix object. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.shape[-1] != x1_desc.shape[-2]: pass else: if input.dtype == dpnp.int32 or input.dtype == dpnp.int64: # TODO memory copy. needs to move into DPNPC - input_ = dpnp.get_dpnp_descriptor(dpnp.astype(input, dpnp.float64)) + input_ = dpnp.get_dpnp_descriptor(dpnp.astype(input, dpnp.float64), copy_when_nondefault_queue=False) else: input_ = x1_desc return dpnp_cholesky(input_).get_pyobj() @@ -145,7 +145,7 @@ def det(input): Determinant of `input`. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.shape[-1] == x1_desc.shape[-2]: result_obj = dpnp_det(x1_desc).get_pyobj() @@ -164,7 +164,7 @@ def eig(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if (x1_desc.size > 0): return dpnp_eig(x1_desc) @@ -191,7 +191,7 @@ def eigvals(input): real for real matrices. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.size > 0: return dpnp_eigvals(x1_desc).get_pyobj() @@ -213,7 +213,7 @@ def inv(input): Otherwise the function will be executed sequentially on CPU. """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if x1_desc.ndim == 2 and x1_desc.shape[0] == x1_desc.shape[1] and x1_desc.shape[0] >= 2: return dpnp_inv(x1_desc).get_pyobj() @@ -277,7 +277,7 @@ def matrix_rank(input, tol=None, hermitian=False): """ - x1_desc = dpnp.get_dpnp_descriptor(input) + x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) if x1_desc: if tol is not None: pass @@ -362,7 +362,7 @@ def norm(x1, ord=None, axis=None, keepdims=False): Norm of the matrix or vector(s). """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not isinstance(axis, int) and not isinstance(axis, tuple) and axis is not None: pass @@ -395,7 +395,7 @@ def qr(x1, mode='reduced'): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if mode != 'reduced': pass @@ -464,7 +464,7 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): """ - x1_desc = dpnp.get_dpnp_descriptor(x1) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) if x1_desc: if not x1_desc.ndim == 2: pass diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index b781e377202..424c84158f0 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -5,60 +5,29 @@ tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25 tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25] +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifftn + +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn - -tests/test_linalg.py::test_eig_arange[2-float64] -tests/test_linalg.py::test_eig_arange[2-float32] -tests/test_linalg.py::test_eig_arange[2-int64] -tests/test_linalg.py::test_eig_arange[2-int32] -tests/test_linalg.py::test_eig_arange[4-float64] -tests/test_linalg.py::test_eig_arange[4-float32] -tests/test_linalg.py::test_eig_arange[4-int64] -tests/test_linalg.py::test_eig_arange[4-int32] -tests/test_linalg.py::test_eig_arange[8-float64] -tests/test_linalg.py::test_eig_arange[8-float32] -tests/test_linalg.py::test_eig_arange[8-int64] -tests/test_linalg.py::test_eig_arange[8-int32] -tests/test_linalg.py::test_eig_arange[16-float64] -tests/test_linalg.py::test_eig_arange[16-float32] -tests/test_linalg.py::test_eig_arange[16-int64] -tests/test_linalg.py::test_eig_arange[16-int32] -tests/test_linalg.py::test_eig_arange[300-float64] -tests/test_linalg.py::test_eig_arange[300-float32] -tests/test_linalg.py::test_eig_arange[300-int64] -tests/test_linalg.py::test_eig_arange[300-int32] -tests/test_linalg.py::test_eigvals tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory @@ -113,22 +82,120 @@ tests/test_dparray.py::test_astype[[]-complex-int32] tests/test_dparray.py::test_astype[[]-complex-bool] tests/test_dparray.py::test_astype[[]-complex-bool_] tests/test_dparray.py::test_astype[[]-complex-complex] -tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] + +tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[None-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[1-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] tests/test_linalg.py::test_cond[1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[-2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-1-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] -tests/test_linalg.py::test_cond[-2-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[2-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[-2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-2-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[numpy.inf-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +tests/test_linalg.py::test_cond[-numpy.inf-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] +tests/test_linalg.py::test_cond[-numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond["fro"-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] tests/test_linalg.py::test_cond["fro"-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] -tests/test_linalg.py::test_cond[None-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[-numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_cond[numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_svd[(2,2)-complex128] -tests/test_linalg.py::test_svd[(3,4)-complex128] -tests/test_linalg.py::test_svd[(5,3)-complex128] -tests/test_linalg.py::test_svd[(16,16)-complex128] + +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float32] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int32] + +tests/test_linalg.py::test_norm1[0-None-[7]] +tests/test_linalg.py::test_norm1[0-None-[1, 2]] +tests/test_linalg.py::test_norm1[0-None-[1, 0]] +tests/test_linalg.py::test_norm1[0-3-[7]] +tests/test_linalg.py::test_norm1[0-3-[1, 2]] +tests/test_linalg.py::test_norm1[0-3-[1, 0]] +tests/test_linalg.py::test_norm1[None-3-[7]] +tests/test_linalg.py::test_norm1[None-3-[1, 2]] +tests/test_linalg.py::test_norm1[None-3-[1, 0]] + +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 2]]] +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-None-[[1, 2], [3, 4]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 2]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[(0, 1)-"fro"-[[1, 2], [3, 4]]] +tests/test_linalg.py::test_norm2[None-None-[[1, 2]]] +tests/test_linalg.py::test_norm2[None-None-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[None-None-[[1, 2], [3, 4]]] +tests/test_linalg.py::test_norm2[None-"fro"-[[1, 2]]] +tests/test_linalg.py::test_norm2[None-"fro"-[[1, 0], [3, 0]]] +tests/test_linalg.py::test_norm2[None-"fro"-[[1, 2], [3, 4]]] + +tests/test_linalg.py::test_norm3[0-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[0--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0--2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[0--1-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0--1-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[0-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[0-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1--2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1--1-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1--1-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[1-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[1-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[2-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[2-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[2--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[2--1-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[2-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 1)--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)--2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 1)-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 1)-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 2)-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 2)-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(0, 2)--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 2)-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(0, 2)-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(1, 2)-None-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(1, 2)-None-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] +tests/test_linalg.py::test_norm3[(1, 2)--2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(1, 2)-2-[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]] +tests/test_linalg.py::test_norm3[(1, 2)-2-[[[1, 0], [3, 0]], [[5, 0], [7, 0]]]] + +tests/test_linalg.py::test_qr[complete-(2,2)-float64] +tests/test_linalg.py::test_qr[complete-(3,4)-float64] +tests/test_linalg.py::test_qr[complete-(3,4)-int64] +tests/test_linalg.py::test_qr[complete-(3,4)-int32] +tests/test_linalg.py::test_qr[complete-(5,3)-float64] +tests/test_linalg.py::test_qr[complete-(5,3)-int64] +tests/test_linalg.py::test_qr[complete-(5,3)-int32] +tests/test_linalg.py::test_qr[complete-(16,16)-float64] +tests/test_linalg.py::test_qr[complete-(16,16)-int64] +tests/test_linalg.py::test_qr[complete-(16,16)-int32] +tests/test_linalg.py::test_qr[reduced-(2,2)-float64] +tests/test_linalg.py::test_qr[reduced-(3,4)-float64] +tests/test_linalg.py::test_qr[reduced-(5,3)-float64] +tests/test_linalg.py::test_qr[reduced-(16,16)-float64] + +tests/test_linalg.py::test_svd[(2,2)-float64] +tests/test_linalg.py::test_svd[(3,4)-float64] +tests/test_linalg.py::test_svd[(5,3)-float64] +tests/test_linalg.py::test_svd[(16,16)-float64] + tests/test_mathematical.py::TestGradient::test_gradient_y1_dx[3.5-array1] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]] diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index d41fe24c3c7..eed4bafdb32 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -91,47 +91,6 @@ tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-remainder-data15-data25 tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-floor_divide-data12-data22] tests/test_sycl_queue.py::test_broadcasting[opencl:cpu:0-remainder-data15-data25] -tests/test_fft.py::test_fft_ndim[None-shape3-float32] -tests/test_fft.py::test_fft_ndim[None-shape3-float64] -tests/test_fft.py::test_fft_ndim[None-shape3-int32] -tests/test_fft.py::test_fft_ndim[None-shape3-int64] -tests/test_fft.py::test_fft_ndim[forward-shape3-float32] -tests/test_fft.py::test_fft_ndim[forward-shape3-float64] -tests/test_fft.py::test_fft_ndim[forward-shape3-int32] -tests/test_fft.py::test_fft_ndim[forward-shape3-int64] -tests/test_fft.py::test_fft_ndim[ortho-shape3-float32] -tests/test_fft.py::test_fft_ndim[ortho-shape3-float64] -tests/test_fft.py::test_fft_ndim[ortho-shape3-int32] -tests/test_fft.py::test_fft_ndim[ortho-shape3-int64] -tests/test_fft.py::test_fft_ifft[None-shape4-float32] -tests/test_fft.py::test_fft_ifft[None-shape4-float64] -tests/test_fft.py::test_fft_ifft[None-shape4-int32] -tests/test_fft.py::test_fft_ifft[None-shape4-int64] -tests/test_fft.py::test_fft_ifft[forward-shape4-float32] -tests/test_fft.py::test_fft_ifft[forward-shape4-float64] -tests/test_fft.py::test_fft_ifft[forward-shape4-int32] -tests/test_fft.py::test_fft_ifft[forward-shape4-int64] -tests/test_fft.py::test_fft_ifft[ortho-shape4-float32] -tests/test_fft.py::test_fft_ifft[ortho-shape4-float64] -tests/test_fft.py::test_fft_ifft[ortho-shape4-int32] -tests/test_fft.py::test_fft_ifft[ortho-shape4-int64] -tests/test_fft.py::test_fft_rfft[shape1-float32] -tests/test_fft.py::test_fft_rfft[shape1-float64] -tests/test_fft.py::test_fft_rfft[shape1-int32] -tests/test_fft.py::test_fft_rfft[shape1-int64] -tests/test_fft.py::test_fft_rfft[shape2-float32] -tests/test_fft.py::test_fft_rfft[shape2-float64] -tests/test_fft.py::test_fft_rfft[shape2-int32] -tests/test_fft.py::test_fft_rfft[shape2-int64] -tests/test_fft.py::test_fft_rfft[shape3-float32] -tests/test_fft.py::test_fft_rfft[shape3-float64] -tests/test_fft.py::test_fft_rfft[shape3-int32] -tests/test_fft.py::test_fft_rfft[shape3-int64] -tests/test_fft.py::test_fft_rfft[shape4-float32] -tests/test_fft.py::test_fft_rfft[shape4-float64] -tests/test_fft.py::test_fft_rfft[shape4-int32] -tests/test_fft.py::test_fft_rfft[shape4-int64] - tests/test_indexing.py::test_nonzero[[[1, 0], [1, 0]]] tests/test_indexing.py::test_nonzero[[[1, 2], [3, 4]]] tests/test_indexing.py::test_nonzero[[[0, 1, 2], [3, 0, 5], [6, 7, 0]]] @@ -380,10 +339,6 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{extern tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis -tests/test_linalg.py::test_eig_arange[16-float64] -tests/test_linalg.py::test_eig_arange[16-float32] -tests/test_linalg.py::test_eig_arange[16-int64] -tests/test_linalg.py::test_eig_arange[16-int32] tests/test_random.py::test_randn_normal_distribution tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_outer tests/third_party/cupy/random_tests/test_sample.py::TestRandintDtype::test_dtype @@ -445,6 +400,7 @@ tests/test_dparray.py::test_astype[[]-complex-int32] tests/test_dparray.py::test_astype[[]-complex-bool] tests/test_dparray.py::test_astype[[]-complex-bool_] tests/test_dparray.py::test_astype[[]-complex-complex] + tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[-2-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] @@ -457,10 +413,20 @@ tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] tests/test_linalg.py::test_cond[None-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[-numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] tests/test_linalg.py::test_cond[numpy.inf-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] -tests/test_linalg.py::test_eig_arange[300-float32] -tests/test_linalg.py::test_eig_arange[300-float64] -tests/test_linalg.py::test_eig_arange[300-int32] -tests/test_linalg.py::test_eig_arange[300-int64] + +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-float32] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int64] +tests/test_linalg.py::test_matrix_rank[None-[0, 1]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [1, 2]]-int32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float32] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int64] +tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int32] + tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)] @@ -739,54 +705,30 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_ tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop2 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_start_stop_list + +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 +tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_15_{axes=(), norm=None, s=None, shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_16_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_18_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_19_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifft2 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_20_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_9_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifft2 -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_ifftn + +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_17_{axes=(), norm='ortho', s=None, shape=(2, 3, 4)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_18_{axes=(0, 1, 2), norm='ortho', s=(2, 3), shape=(2, 3, 4)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_ifftn +tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_10_{axes=None, norm=None, s=(1, 4, None), shape=(2, 3, 4)}::test_fftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_21_{axes=None, norm=None, s=None, shape=(0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_22_{axes=None, norm=None, s=None, shape=(2, 0, 5)}::test_ifftn tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm=None, s=None, shape=(0, 0, 5)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_2_{n=None, norm=None, shape=(10,)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_fftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_6_{n=None, norm='ortho', shape=(10,)}::test_ifftn -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_3_{n=None, norm=None, shape=(10, 10)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_3_{n=None, norm=None, shape=(10, 10)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_7_{n=None, norm='ortho', shape=(10, 10)}::test_fft -tests/third_party/cupy/fft_tests/test_fft.py::TestFft_param_7_{n=None, norm='ortho', shape=(10, 10)}::test_ifft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_0_{n=None, norm=None, shape=(10,)}::test_irfft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_0_{n=None, norm=None, shape=(10,)}::test_rfft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_1_{n=None, norm=None, shape=(10, 10)}::test_irfft -tests/third_party/cupy/fft_tests/test_fft.py::TestRfft_param_1_{n=None, norm=None, shape=(10, 10)}::test_rfft tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_AxisConcatenator_init1 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_len diff --git a/tests/test_fft.py b/tests/test_fft.py index 66019defd1a..f1065cc70fc 100644 --- a/tests/test_fft.py +++ b/tests/test_fft.py @@ -1,63 +1,60 @@ -import pytest - -import dpnp - -import numpy - - -@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) -def test_fft(type, norm): - # 1 dim array - data = numpy.arange(100, dtype=numpy.dtype(type)) - # TODO: - # doesn't work correct with `complex64` (not supported) - # dpnp_data = dpnp.arange(100, dtype=dpnp.dtype(type)) - dpnp_data = dpnp.array(data) - - np_res = numpy.fft.fft(data, norm=norm) - dpnp_res = dpnp.asnumpy(dpnp.fft.fft(dpnp_data, norm=norm)) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype - - -@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("shape", [(8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) -@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) -def test_fft_ndim(type, shape, norm): - np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) - dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) - - np_res = numpy.fft.fft(np_data, norm=norm) - dpnp_res = dpnp.fft.fft(dpnp_data, norm=norm) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype - - -@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("shape", [(64,), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) -@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) -def test_fft_ifft(type, shape, norm): - np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) - dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) - - np_res = numpy.fft.ifft(np_data, norm=norm) - dpnp_res = dpnp.fft.ifft(dpnp_data, norm=norm) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype - - -@pytest.mark.parametrize("type", ['float32', 'float64', 'int32', 'int64']) -@pytest.mark.parametrize("shape", [(64, ), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) -def test_fft_rfft(type, shape): - np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) - dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) - - np_res = numpy.fft.rfft(np_data) - dpnp_res = dpnp.fft.rfft(dpnp_data) - - numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) - assert dpnp_res.dtype == np_res.dtype +import pytest + +import dpnp + +import numpy + + +@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) +def test_fft(type, norm): + # 1 dim array + data = numpy.arange(100, dtype=numpy.dtype(type)) + dpnp_data = dpnp.array(data) + + np_res = numpy.fft.fft(data, norm=norm) + dpnp_res = dpnp.asnumpy(dpnp.fft.fft(dpnp_data, norm=norm)) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + +@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("shape", [(8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) +@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) +def test_fft_ndim(type, shape, norm): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) + + np_res = numpy.fft.fft(np_data, norm=norm) + dpnp_res = dpnp.fft.fft(dpnp_data, norm=norm) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + +@pytest.mark.parametrize("type", ['complex128', 'complex64', 'float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("shape", [(64,), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) +@pytest.mark.parametrize("norm", [None, 'forward', 'ortho']) +def test_fft_ifft(type, shape, norm): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) + + np_res = numpy.fft.ifft(np_data, norm=norm) + dpnp_res = dpnp.fft.ifft(dpnp_data, norm=norm) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + +@pytest.mark.parametrize("type", ['float32', 'float64', 'int32', 'int64']) +@pytest.mark.parametrize("shape", [(64, ), (8, 8), (4, 16), (4, 4, 4), (2, 4, 4, 2)]) +def test_fft_rfft(type, shape): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.arange(64, dtype=numpy.dtype(type)).reshape(shape) + + np_res = numpy.fft.rfft(np_data) + dpnp_res = dpnp.fft.rfft(dpnp_data) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype diff --git a/tests/test_linalg.py b/tests/test_linalg.py index ecd12040fd5..b9535a7b274 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -2,10 +2,19 @@ import dpnp as inp +import dpctl import numpy def vvsort(val, vec, size, xp): + val_kwargs = dict() + if hasattr(val, 'sycl_queue'): + val_kwargs['sycl_queue'] = getattr(val, "sycl_queue", None) + + vec_kwargs = dict() + if hasattr(vec, 'sycl_queue'): + vec_kwargs['sycl_queue'] = getattr(vec, "sycl_queue", None) + for i in range(size): imax = i for j in range(i + 1, size): @@ -17,16 +26,15 @@ def vvsort(val, vec, size, xp): unravel_i = numpy.unravel_index(i, val.shape) unravel_imax = numpy.unravel_index(imax, val.shape) - temp = xp.empty(tuple(), dtype=vec.dtype) - temp[()] = val[unravel_i] # make a copy + # swap elements in val array + temp = xp.array(val[unravel_i], dtype=vec.dtype, **val_kwargs) val[unravel_i] = val[unravel_imax] val[unravel_imax] = temp - for k in range(size): - temp = xp.empty(tuple(), dtype=val.dtype) - temp[()] = vec[k, i] # make a copy - vec[k, i] = vec[k, imax] - vec[k, imax] = temp + # swap corresponding columns in vec matrix + temp = xp.array(vec[:, i], dtype=val.dtype, **vec_kwargs) + vec[:, i] = vec[:, imax] + vec[:, imax] = temp @pytest.mark.parametrize("array", @@ -83,6 +91,9 @@ def test_det(array): @pytest.mark.parametrize("size", [2, 4, 8, 16, 300]) def test_eig_arange(type, size): + if dpctl.get_current_device_type() != dpctl.device_type.gpu: + pytest.skip("eig function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + a = numpy.arange(size * size, dtype=type).reshape((size, size)) symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=type)) symm = symm_orig @@ -115,14 +126,20 @@ def test_eig_arange(type, size): numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05) -def test_eigvals(): +@pytest.mark.parametrize("type", + [numpy.float64, numpy.float32, numpy.int64, numpy.int32], + ids=['float64', 'float32', 'int64', 'int32']) +def test_eigvals(type): + if dpctl.get_current_device_type() != dpctl.device_type.gpu: + pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + arrays = [ [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]] ] for array in arrays: - a = numpy.array(array) + a = numpy.array(array, dtype=type) ia = inp.array(a) result = inp.linalg.eigvals(ia) expected = numpy.linalg.eigvals(a) @@ -143,23 +160,23 @@ def test_inv(type, array): numpy.testing.assert_allclose(expected, result) -def test_matrix_rank(): - arrays = [ - [0, 0], - # [0, 1], - [1, 2], - [[0, 0], [0, 0]], - # [[1, 2], [1, 2]], - # [[1, 2], [3, 4]], - ] - tols = [None] - for array in arrays: - for tol in tols: - a = numpy.array(array) - ia = inp.array(a) - result = inp.linalg.matrix_rank(ia, tol=tol) - expected = numpy.linalg.matrix_rank(a, tol=tol) - numpy.testing.assert_array_equal(expected, result) +@pytest.mark.parametrize("type", + [numpy.float64, numpy.float32, numpy.int64, numpy.int32], + ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("array", + [[0, 0], [0, 1], [1, 2], [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]], + ids=['[0, 0]', '[0, 1]', '[1, 2]', '[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]']) +@pytest.mark.parametrize("tol", + [None], + ids=['None']) +def test_matrix_rank(type, tol, array): + a = numpy.array(array, dtype=type) + ia = inp.array(a) + + result = inp.linalg.matrix_rank(ia, tol=tol) + expected = numpy.linalg.matrix_rank(a, tol=tol) + + numpy.testing.assert_allclose(expected, result) @pytest.mark.parametrize("array", diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index b858db48a1f..42169629030 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -41,6 +41,37 @@ def assert_sycl_queue_equal(result, expected): assert exec_queue is not None +def vvsort(val, vec, size, xp): + val_kwargs = dict() + if hasattr(val, 'sycl_queue'): + val_kwargs['sycl_queue'] = getattr(val, "sycl_queue", None) + + vec_kwargs = dict() + if hasattr(vec, 'sycl_queue'): + vec_kwargs['sycl_queue'] = getattr(vec, "sycl_queue", None) + + for i in range(size): + imax = i + for j in range(i + 1, size): + unravel_imax = numpy.unravel_index(imax, val.shape) + unravel_j = numpy.unravel_index(j, val.shape) + if xp.abs(val[unravel_imax]) < xp.abs(val[unravel_j]): + imax = j + + unravel_i = numpy.unravel_index(i, val.shape) + unravel_imax = numpy.unravel_index(imax, val.shape) + + # swap elements in val array + temp = xp.array(val[unravel_i], dtype=vec.dtype, **val_kwargs) + val[unravel_i] = val[unravel_imax] + val[unravel_imax] = temp + + # swap corresponding columns in vec matrix + temp = xp.array(vec[:, i], dtype=val.dtype, **vec_kwargs) + vec[:, i] = vec[:, imax] + vec[:, imax] = temp + + @pytest.mark.parametrize( "func,data", [ @@ -104,7 +135,6 @@ def test_1in_1out(func, data, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize( @@ -169,7 +199,6 @@ def test_2in_1out(func, data1, data2, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize( @@ -216,7 +245,6 @@ def test_broadcasting(func, data1, data2, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize( @@ -277,7 +305,6 @@ def test_out(func, data1, data2, device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) - assert result_queue.sycl_device == expected_queue.sycl_device @pytest.mark.parametrize("device", @@ -302,8 +329,255 @@ def test_modf(device): assert_sycl_queue_equal(result1_queue, expected_queue) assert_sycl_queue_equal(result2_queue, expected_queue) - assert result1_queue.sycl_device == expected_queue.sycl_device - assert result2_queue.sycl_device == expected_queue.sycl_device + +@pytest.mark.parametrize("type", ['complex128']) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_fft(type, device): + data = numpy.arange(100, dtype=numpy.dtype(type)) + + dpnp_data = dpnp.array(data, device=device) + + expected = numpy.fft.fft(data) + result = dpnp.fft.fft(dpnp_data) + + numpy.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-7) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("type", ['float32']) +@pytest.mark.parametrize("shape", [(8,8)]) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_fft_rfft(type, shape, device): + np_data = numpy.arange(64, dtype=numpy.dtype(type)).reshape(shape) + dpnp_data = dpnp.array(np_data, device=device) + + np_res = numpy.fft.rfft(np_data) + dpnp_res = dpnp.fft.rfft(dpnp_data) + + numpy.testing.assert_allclose(dpnp_res, np_res, rtol=1e-4, atol=1e-7) + assert dpnp_res.dtype == np_res.dtype + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = dpnp_res.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_cholesky(device): + data = [[[1., -2.], [2., 5.]], [[1., -2.], [2., 5.]]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.cholesky(dpnp_data) + expected = numpy.linalg.cholesky(numpy_data) + numpy.testing.assert_array_equal(expected, result) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_det(device): + data = [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.det(dpnp_data) + expected = numpy.linalg.det(numpy_data) + numpy.testing.assert_allclose(expected, result) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_eig(device): + if device.device_type != dpctl.device_type.gpu: + pytest.skip("eig function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + + size = 4 + a = numpy.arange(size * size, dtype='float64').reshape((size, size)) + symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype='float64')) + numpy_data = symm_orig + dpnp_symm_orig = dpnp.array(numpy_data, device=device) + dpnp_data = dpnp_symm_orig + + dpnp_val, dpnp_vec = dpnp.linalg.eig(dpnp_data) + numpy_val, numpy_vec = numpy.linalg.eig(numpy_data) + + # DPNP sort val/vec by abs value + vvsort(dpnp_val, dpnp_vec, size, dpnp) + + # NP sort val/vec by abs value + vvsort(numpy_val, numpy_vec, size, numpy) + + # NP change sign of vectors + for i in range(numpy_vec.shape[1]): + if numpy_vec[0, i] * dpnp_vec[0, i] < 0: + numpy_vec[:, i] = -numpy_vec[:, i] + + numpy.testing.assert_allclose(dpnp_val, numpy_val, rtol=1e-05, atol=1e-05) + numpy.testing.assert_allclose(dpnp_vec, numpy_vec, rtol=1e-05, atol=1e-05) + + assert (dpnp_val.dtype == numpy_val.dtype) + assert (dpnp_vec.dtype == numpy_vec.dtype) + assert (dpnp_val.shape == numpy_val.shape) + assert (dpnp_vec.shape == numpy_vec.shape) + + expected_queue = dpnp_data.get_array().sycl_queue + dpnp_val_queue = dpnp_val.get_array().sycl_queue + dpnp_vec_queue = dpnp_vec.get_array().sycl_queue + + # compare queue and device + assert_sycl_queue_equal(dpnp_val_queue, expected_queue) + assert_sycl_queue_equal(dpnp_vec_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_eigvals(device): + if device.device_type != dpctl.device_type.gpu: + pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") + + data = [[0, 0], [0, 0]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.eigvals(dpnp_data) + expected = numpy.linalg.eigvals(numpy_data) + numpy.testing.assert_allclose(expected, result, atol=0.5) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_inv(device): + data = [[1., 2.], [3., 4.]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.inv(dpnp_data) + expected = numpy.linalg.inv(numpy_data) + numpy.testing.assert_allclose(expected, result) + + expected_queue = dpnp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + + assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_matrix_rank(device): + data = [[0, 0], [0, 0]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + result = dpnp.linalg.matrix_rank(dpnp_data) + expected = numpy.linalg.matrix_rank(numpy_data) + numpy.testing.assert_array_equal(expected, result) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_qr(device): + tol = 1e-11 + data = [[1,2,3], [1,2,3]] + numpy_data = numpy.array(data) + dpnp_data = dpnp.array(data, device=device) + + np_q, np_r = numpy.linalg.qr(numpy_data, "reduced") + dpnp_q, dpnp_r = dpnp.linalg.qr(dpnp_data, "reduced") + + assert (dpnp_q.dtype == np_q.dtype) + assert (dpnp_r.dtype == np_r.dtype) + assert (dpnp_q.shape == np_q.shape) + assert (dpnp_r.shape == np_r.shape) + + numpy.testing.assert_allclose(dpnp_q, np_q, rtol=tol, atol=tol) + numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol) + + expected_queue = dpnp_data.get_array().sycl_queue + dpnp_q_queue = dpnp_q.get_array().sycl_queue + dpnp_r_queue = dpnp_r.get_array().sycl_queue + + # compare queue and device + assert_sycl_queue_equal(dpnp_q_queue, expected_queue) + assert_sycl_queue_equal(dpnp_r_queue, expected_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_svd(device): + tol = 1e-12 + shape = (2,2) + numpy_data = numpy.arange(shape[0] * shape[1]).reshape(shape) + dpnp_data = dpnp.arange(shape[0] * shape[1]).reshape(shape) + np_u, np_s, np_vt = numpy.linalg.svd(numpy_data) + dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd(dpnp_data) + + assert (dpnp_u.dtype == np_u.dtype) + assert (dpnp_s.dtype == np_s.dtype) + assert (dpnp_vt.dtype == np_vt.dtype) + assert (dpnp_u.shape == np_u.shape) + assert (dpnp_s.shape == np_s.shape) + assert (dpnp_vt.shape == np_vt.shape) + + # check decomposition + dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype) + for i in range(dpnp_s.size): + dpnp_diag_s[i, i] = dpnp_s[i] + + # check decomposition + numpy.testing.assert_allclose(dpnp_data, dpnp.dot(dpnp_u, dpnp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol) + + for i in range(min(shape[0], shape[1])): + if np_u[0, i] * dpnp_u[0, i] < 0: + np_u[:, i] = -np_u[:, i] + np_vt[i, :] = -np_vt[i, :] + + # compare vectors for non-zero values + for i in range(numpy.count_nonzero(np_s > tol)): + numpy.testing.assert_allclose(dpnp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol) + numpy.testing.assert_allclose(dpnp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol) + + expected_queue = dpnp_data.get_array().sycl_queue + dpnp_u_queue = dpnp_u.get_array().sycl_queue + dpnp_s_queue = dpnp_s.get_array().sycl_queue + dpnp_vt_queue = dpnp_vt.get_array().sycl_queue + + # compare queue and device + assert_sycl_queue_equal(dpnp_u_queue, expected_queue) + assert_sycl_queue_equal(dpnp_s_queue, expected_queue) + assert_sycl_queue_equal(dpnp_vt_queue, expected_queue) @pytest.mark.parametrize("device_from",