diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 804a2b7542..2bd27c3681 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -71,3 +71,10 @@ KOKKOSKERNELS_GENERATE_ETI(Lapack_svd svd SOURCE_LIST SOURCES TYPE_LISTS FLOATS LAYOUTS DEVICES ) + +KOKKOSKERNELS_GENERATE_ETI(Lapack_geqrf geqrf + COMPONENTS lapack + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..4f4ad91cb6 --- /dev/null +++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in @@ -0,0 +1,25 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosLapack_geqrf_spec.hpp" + +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEQRF_ETI_INST_BLOCK@ + } // namespace Impl +} // namespace KokkosLapack diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..899a8b7604 --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in @@ -0,0 +1,24 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEQRF_ETI_AVAIL_BLOCK@ + } // namespace Impl +} // namespace KokkosLapack +#endif diff --git a/lapack/impl/KokkosLapack_geqrf_impl.hpp b/lapack/impl/KokkosLapack_geqrf_impl.hpp new file mode 100644 index 0000000000..ea20018073 --- /dev/null +++ b/lapack/impl/KokkosLapack_geqrf_impl.hpp @@ -0,0 +1,34 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_IMPL_GEQRF_HPP_ +#define KOKKOSLAPACK_IMPL_GEQRF_HPP_ + +/// \file KokkosLapack_geqrf_impl.hpp +/// \brief Implementation(s) of dense linear solve. + +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// NOTE: Might add the implementation of KokkosLapack::geqrf later + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_IMPL_GEQRF_HPP diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp new file mode 100644 index 0000000000..89a253b796 --- /dev/null +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -0,0 +1,139 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ +#define KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ + +#include +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct geqrf_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization availability +// KokkosLapack::Impl::GEQRF. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct geqrf_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// Unification layer +template < + class ExecutionSpace, class AMatrix, class TWArray, class RType, + bool tpl_spec_avail = + geqrf_tpl_spec_avail::value, + bool eti_spec_avail = + geqrf_eti_spec_avail::value> +struct GEQRF { + static void geqrf(const ExecutionSpace &space, const AMatrix &A, + const TWArray &Tau, const TWArray &Work, const RType &R); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +// Unification layer +template +struct GEQRF { + static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, + const TWArray & /* Tau */, const TWArray & /* Work */, + const RType & /* R */) { + // NOTE: Might add the implementation of KokkosLapack::geqrf later + throw std::runtime_error( + "No fallback implementation of GEQRF (general QR factorization) " + "exists. Enable LAPACK, CUSOLVER, ROCSOLVER or MAGMA TPL."); + } +}; + +#endif +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization of +// KokkosLapack::Impl::GEQRF. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GEQRF< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GEQRF< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#include + +#endif // KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp new file mode 100644 index 0000000000..c680120ae3 --- /dev/null +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -0,0 +1,171 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file KokkosLapack_geqrf.hpp +/// \brief QR factorization +/// +/// This file provides KokkosLapack::geqrf. This function performs a +/// local (no MPI) QR factorization of a M-by-N matrix A. + +#ifndef KOKKOSLAPACK_GEQRF_HPP_ +#define KOKKOSLAPACK_GEQRF_HPP_ + +#include + +#include "KokkosLapack_geqrf_spec.hpp" +#include "KokkosKernels_Error.hpp" + +namespace KokkosLapack { + +/// \brief Computes a QR factorization of a matrix A +/// +/// \tparam ExecutionSpace The space where the kernel will run. +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. +/// +/// \param space [in] Execution space instance used to specified how to execute +/// the geqrf kernels. +/// \param A [in,out] On entry, the M-by-N matrix to be factorized. +/// On exit, the elements on and above the diagonal contain +/// the min(M,N)-by-N upper trapezoidal matrix R (R is upper +/// triangular if M >= N); the elements below the diagonal, +/// with the array Tau, represent the unitary matrix Q as a +/// product of min(M,N) elementary reflectors. The matrix Q +/// is represented as a product of elementary reflectors +/// Q = H(1) H(2) . . . H(k), where k = min(M,N). +/// Each H(i) has the form +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). +/// \param Tau [out] One-dimensional array of size min(M,N) that contains the +/// scalar factors of the elementary reflectors. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successfull exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value +/// +template +void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, + const InfoArray& Info) { + // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and + // rocSOLVER TPLs. + // MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU + // interface for device views LAPACK TPL should be enabled to call the + // LAPACK interface for host views + + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: Tau must be Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: Info must be Kokkos::View."); + + static_assert(static_cast(AMatrix::rank) == 2, + "KokkosLapack::geqrf: A must have rank 2."); + static_assert(static_cast(TauArray::rank) == 1, + "KokkosLapack::geqrf: Tau must have rank 1."); + static_assert(static_cast(InfoArray::rank) == 1, + "KokkosLapack::geqrf: Info must have rank 1."); + + static_assert(std::is_same_v, + "KokkosLapack::geqrf: Info must be an array of integers."); + + int64_t m = A.extent(0); + int64_t n = A.extent(1); + int64_t tau0 = Tau.extent(0); + int64_t info0 = Info.extent(0); + + // Check validity of dimensions + if (tau0 != std::min(m, n)) { + std::ostringstream os; + os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): " + << " A: " << m << " x " << n << ", Tau length = " << tau0; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + if (info0 == 0) { + std::ostringstream os; + os << "KokkosLapack::geqrf: length of Info must be at least 1: " + << " A: " << m << " x " << n << ", Info length = " << info0; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + using AMatrix_Internal = Kokkos::View< + typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, + typename AMatrix::device_type, Kokkos::MemoryTraits>; + using TauArray_Internal = Kokkos::View< + typename TauArray::non_const_value_type*, typename TauArray::array_layout, + typename TauArray::device_type, Kokkos::MemoryTraits>; + using InfoArray_Internal = + Kokkos::View>; + + AMatrix_Internal A_i = A; + TauArray_Internal Tau_i = Tau; + InfoArray_Internal Info_i = Info; + + KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, + Info_i); +} + +/// \brief Computes a QR factorization of a matrix A +/// +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. +/// +/// \param A [in,out] On entry, the M-by-N matrix to be factorized. +/// On exit, the elements on and above the diagonal contain +/// the min(M,N)-by-N upper trapezoidal matrix R (R is upper +/// triangular if M >= N); the elements below the diagonal, +/// with the array Tau, represent the unitary matrix Q as a +/// product of min(M,N) elementary reflectors. The matrix Q +/// is represented as a product of elementary reflectors +/// Q = H(1) H(2) . . . H(k), where k = min(M,N). +/// Each H(i) has the form +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). +/// \param Tau [out] One-dimensional array of size min(M,N) that contains the +/// scalar factors of the elementary reflectors. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successfull exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value +/// +template +void geqrf(const AMatrix& A, const TauArray& Tau, const InfoArray& Info) { + typename AMatrix::execution_space space{}; + geqrf(space, A, Tau, Info); +} + +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_GEQRF_HPP_ diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index add0a802bd..f72d781e5b 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -82,6 +82,21 @@ void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); + +/// +/// Geqrf +/// + +void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(int*, int*, float*, int*, float*, float*, + int*, int*); +void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(int*, int*, double*, int*, double*, + double*, int*, int*); +void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(int*, int*, std::complex*, int*, + std::complex*, std::complex*, + int*, int*); +void F77_BLAS_MANGLE(zgeqrf, ZGEQRF)(int*, int*, std::complex*, int*, + std::complex*, + std::complex*, int*, int*); } #define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV) @@ -99,6 +114,11 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, #define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI) #define F77_FUNC_ZTRTRI F77_BLAS_MANGLE(ztrtri, ZTRTRI) +#define F77_FUNC_SGEQRF F77_BLAS_MANGLE(sgeqrf, SGEQRF) +#define F77_FUNC_DGEQRF F77_BLAS_MANGLE(dgeqrf, DGEQRF) +#define F77_FUNC_CGEQRF F77_BLAS_MANGLE(cgeqrf, CGEQRF) +#define F77_FUNC_ZGEQRF F77_BLAS_MANGLE(zgeqrf, ZGEQRF) + namespace KokkosLapack { namespace Impl { @@ -127,6 +147,11 @@ int HostLapack::trtri(const char uplo, const char diag, int n, F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack::geqrf(int m, int n, float* a, int lda, float* tau, + float* work, int lwork, int* info) { + F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} /// /// double @@ -153,20 +178,25 @@ int HostLapack::trtri(const char uplo, const char diag, int n, F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack::geqrf(int m, int n, double* a, int lda, double* tau, + double* work, int lwork, int* info) { + F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} /// /// std::complex /// template <> -void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { +void HostLapack>::gesv(int n, int rhs, + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd( +void HostLapack>::gesvd( const char jobu, const char jobvt, const int m, const int n, std::complex* a, const int lda, float* s, std::complex* u, const int ldu, std::complex* vt, const int ldvt, @@ -175,27 +205,35 @@ void HostLapack >::gesvd( &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, - int n, const std::complex* a, - int lda) { +int HostLapack>::trtri(const char uplo, const char diag, + int n, const std::complex* a, + int lda) { int info = 0; F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack>::geqrf(int m, int n, + std::complex* a, int lda, + std::complex* tau, + std::complex* work, + int lwork, int* info) { + F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} /// /// std::complex /// template <> -void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { +void HostLapack>::gesv(int n, int rhs, + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd( +void HostLapack>::gesvd( const char jobu, const char jobvt, const int m, const int n, std::complex* a, const int lda, double* s, std::complex* u, const int ldu, std::complex* vt, const int ldvt, @@ -204,14 +242,22 @@ void HostLapack >::gesvd( &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, - int n, - const std::complex* a, - int lda) { +int HostLapack>::trtri(const char uplo, const char diag, + int n, + const std::complex* a, + int lda) { int info = 0; F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +void HostLapack>::geqrf(int m, int n, + std::complex* a, int lda, + std::complex* tau, + std::complex* work, + int lwork, int* info) { + F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); +} } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index 9eca83afea..23f6dbc3d6 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -41,6 +41,9 @@ struct HostLapack { static int trtri(const char uplo, const char diag, int n, const T *a, int lda); + + static void geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork, + int *info); }; } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp new file mode 100644 index 0000000000..cc6f1e78a4 --- /dev/null +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -0,0 +1,165 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct geqrf_tpl_spec_avail { + enum : bool { value = false }; +}; + +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct geqrf_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +} // namespace Impl +} // namespace KokkosLapack + +// MAGMA +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "magma_v2.h" + +namespace KokkosLapack { +namespace Impl { +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // CUSOLVER + +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include + +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp new file mode 100644 index 0000000000..c7630cc783 --- /dev/null +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -0,0 +1,511 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ + +namespace KokkosLapack { +namespace Impl { +template +inline void geqrf_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA + printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n", + typeid(AViewType).name(), typeid(TauViewType).name(), + typeid(InfoViewType).name()); +#else +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + printf( + "KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", + typeid(AViewType).name(), typeid(TauViewType).name(), + typeid(InfoViewType).name()); +#endif +#endif +#endif +} +} // namespace Impl +} // namespace KokkosLapack + +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#include + +namespace KokkosLapack { +namespace Impl { + +template +void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, + const InfoViewType& Info) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename AViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft"); + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = A.stride(1); + + int lwork = -1; + Kokkos::View work("geqrf work buffer", 1); + + if constexpr (Kokkos::ArithTraits::is_complex) { + using MagType = typename Kokkos::ArithTraits::mag_type; + + HostLapack>::geqrf( + m, n, reinterpret_cast*>(A.data()), lda, + reinterpret_cast*>(Tau.data()), + reinterpret_cast*>(work.data()), lwork, + Info.data()); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0).real()); + + work = Kokkos::View("geqrf work buffer", lwork); + + HostLapack>::geqrf( + m, n, reinterpret_cast*>(A.data()), lda, + reinterpret_cast*>(Tau.data()), + reinterpret_cast*>(work.data()), lwork, + Info.data()); + } else { + HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), work.data(), + lwork, Info.data()); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0)); + + work = Kokkos::View("geqrf work buffer", lwork); + + HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), work.data(), + lwork, Info.data()); + } +} + +#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ + const TauViewType& Tau, const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR \ + "]"); \ + geqrf_print_specialization(); \ + lapackGeqrfWrapper(A, Tau, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK + +#if 0 // TO DO + +// MAGMA +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include + +namespace KokkosLapack { +namespace Impl { + +template +void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, + const TauViewType& Tau, const InfoViewType& Info) { + using scalar_type = typename AViewType::non_const_value_type; + + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," + + Kokkos::ArithTraits::name() + "]"); + geqrf_print_specialization(); + + magma_int_t N = static_cast(A.extent(1)); + magma_int_t AST = static_cast(A.stride(1)); + magma_int_t LDA = (AST == 0) ? 1 : AST; + magma_int_t BST = static_cast(B.stride(1)); + magma_int_t LDB = (BST == 0) ? 1 : BST; + magma_int_t NRHS = static_cast(B.extent(1)); + + KokkosLapack::Impl::MagmaSingleton& s = + KokkosLapack::Impl::MagmaSingleton::singleton(); + magma_int_t info = 0; + + space.fence(); + if constexpr (std::is_same_v) { + magma_sgeqrf_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), + LDA, reinterpret_cast(B.data()), + LDB, &info); + } + + if constexpr (std::is_same_v) { + magma_dgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + + if constexpr (std::is_same_v>) { + magma_cgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + + if constexpr (std::is_same_v>) { + magma_zgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + ExecSpace().fence(); + Kokkos::Profiling::popRegion(); +} + +#define KOKKOSLAPACK_GEQRF_MAGMA(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ + const TauViewType& Tau, const InfoViewType& Info) { \ + magmaGeqrfWrapper(space, A, Tau, Info); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA + +#endif // TO DO + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, + const TauViewType& Tau, const InfoViewType& Info) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename AViewType::non_const_value_type; + + using ALayout_t = typename AViewType::array_layout; + static_assert( + std::is_same_v, + "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft"); + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = A.stride(1); + int lwork = 0; + + CudaLapackSingleton& s = CudaLapackSingleton::singleton(); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("cusolver sgeqrf workspace", + lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), + Workspace.data(), lwork, Info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("cusolver dgeqrf workspace", + lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), + Workspace.data(), lwork, Info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize( + s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); + Kokkos::View Workspace( + "cusolver cgeqrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), + reinterpret_cast(Workspace.data()), lwork, Info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize( + s.handle, m, n, reinterpret_cast(A.data()), lda, + &lwork)); + Kokkos::View Workspace( + "cusolver zgeqrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), + reinterpret_cast(Workspace.data()), lwork, + Info.data())); + } + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ + const TauViewType& Tau, const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]"); \ + geqrf_print_specialization(); \ + \ + cusolverGeqrfWrapper(space, A, Tau, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + +// ROCSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include +#include + +namespace KokkosLapack { +namespace Impl { + +template +void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, + const TauViewType& Tau, const InfoViewType& Info) { + using Scalar = typename AViewType::non_const_value_type; + + using ALayout_t = typename AViewType::array_layout; + static_assert( + std::is_same_v, + "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft"); + const rocblas_int m = static_cast(A.extent(0)); + const rocblas_int n = static_cast(A.extent(1)); + const rocblas_int lda = static_cast(A.stride(1)); + + KokkosBlas::Impl::RocBlasSingleton& s = + KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_set_stream(s.handle, space.hip_stream())); + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocsolver_sgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocsolver_dgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()))); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), + lda, reinterpret_cast(Tau.data()))); + } + Kokkos::deep_copy(Info, 0); // Success + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ + const TauViewType& Tau, const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ + geqrf_print_specialization(); \ + \ + rocsolverGeqrfWrapper(space, A, Tau, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp index 1a717521f8..2bcecaceae 100644 --- a/lapack/unit_test/Test_Lapack.hpp +++ b/lapack/unit_test/Test_Lapack.hpp @@ -19,5 +19,6 @@ #include "Test_Lapack_gesv.hpp" #include "Test_Lapack_trtri.hpp" #include "Test_Lapack_svd.hpp" +#include "Test_Lapack_geqrf.hpp" #endif // TEST_LAPACK_HPP diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp new file mode 100644 index 0000000000..0ec9d2679a --- /dev/null +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -0,0 +1,642 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// Only enable this test where KokkosLapack supports geqrf: +// CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + (defined(TEST_HIP_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ + defined(TEST_THREADS_LAPACK_CPP))) + +#include +#include +#include + +#include +#include +#include +#include + +namespace Test { + +template +void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A, + typename ViewTypeTau::HostMirror const& h_tau, + typename ViewTypeA::HostMirror& h_Q, + typename ViewTypeA::HostMirror& h_R, + typename ViewTypeA::HostMirror& h_QR) { + using ScalarA = typename ViewTypeA::value_type; + + // ******************************************************************** + // Populate h_R + // ******************************************************************** + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + if (i <= j) { + h_R(i, j) = h_A(i, j); + } else { + h_R(i, j) = Kokkos::ArithTraits::zero(); + } + } + } + + // ******************************************************************** + // Instantiate the m x m identity matrix h_I + // ******************************************************************** + ViewTypeA I("I", m, m); + typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); + Kokkos::deep_copy(h_I, Kokkos::ArithTraits::zero()); + for (int i(0); i < m; ++i) { + if constexpr (Kokkos::ArithTraits::is_complex) { + h_I(i, i).real() = 1.; + } else { + h_I(i, i) = 1.; + } + } + + // ******************************************************************** + // Compute h_Q + // ******************************************************************** + int minMN(std::min(m, n)); + ViewTypeTau v("v", m); + typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v); + + ViewTypeA Qk("Qk", m, m); + typename ViewTypeA::HostMirror h_Qk = Kokkos::create_mirror_view(Qk); + + ViewTypeA auxM("auxM", m, m); + typename ViewTypeA::HostMirror h_auxM = Kokkos::create_mirror_view(auxM); + + // Q = H(0) H(1) . . . H(min(M,N)-1), where for k=0,1,...,min(m,n)-1: + // H(k) = I - Tau(k) * v * v**H, and + // v is a vector of size m with: + // v(0:k-1) = 0, + // v(k) = 1, + // v(k+1:m-1) = A(k+1:m-1,k). + for (int k(0); k < minMN; ++k) { + Kokkos::deep_copy(h_v, Kokkos::ArithTraits::zero()); + h_v[k] = 1.; + for (int index(k + 1); index < m; ++index) { + h_v[index] = h_A(index, k); + } +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + std::cout << "k = " << k << ", h_v[" << i << "] = " << std::setprecision(16) << h_v[i] << std::endl; + } +#endif + + // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. + // void ger( const char trans[] + // , const typename AViewType::const_value_type & alpha + // , const XViewType & x + // , const YViewType & y + // , const AViewType & A + // ); + Kokkos::deep_copy(h_Qk, h_I); + KokkosBlas::ger("H", -h_tau[k], h_v, h_v, h_Qk); + +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + std::cout << "k = " << k << ", hQk(" << i << "," << j << ") = " << h_Qk(i,j) << std::endl; + } + } +#endif + + // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). + // void gemm( const char transA[] + // , const char transB[] + // , typename AViewType::const_value_type & alpha + // , const AViewType & A + // , const BViewType & B + // , typename CViewType::const_value_type & beta + // , const CViewType & C + // ); + if (k == 0) { + Kokkos::deep_copy(h_Q, h_Qk); + } else { + Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm("N", "N", 1., h_Q, h_Qk, 0., h_auxM); + Kokkos::deep_copy(h_Q, h_auxM); + } + +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + std::cout << "k = " << k << ", hQ(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; + } + } +#endif + } // for k + + // ******************************************************************** + // Check that Q^H Q = I + // ******************************************************************** + { + Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm("C", "N", 1., h_Q, h_Q, 0., h_auxM); + + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8); + if constexpr (std::is_same_v< + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type, + float>) { + absTol = 5.e-5; + } + + using ats = Kokkos::ArithTraits; + bool test_flag_QHQ = true; + for (int i(0); (i < m) && test_flag_QHQ; ++i) { + for (int j(0); (j < m) && test_flag_QHQ; ++j) { + if (ats::abs(h_auxM(i, j) - h_I(i, j)) > absTol) { + std::cout << "QHQ checking" + << ", m = " << m << ", n = " << n << ", i = " << i + << ", j = " << j + << ", h_auxM(i,j) = " << std::setprecision(16) + << h_auxM(i, j) << ", h_I(i,j) = " << std::setprecision(16) + << h_I(i, j) << ", |diff| = " << std::setprecision(16) + << ats::abs(h_auxM(i, j) - h_I(i, j)) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QHQ = false; + } + } + } + ASSERT_EQ(test_flag_QHQ, true); + } + + // ******************************************************************** + // Compute h_QR + // ******************************************************************** + Kokkos::deep_copy(h_QR, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm("N", "N", 1., h_Q, h_R, 0., h_QR); +} + +template +void impl_test_geqrf(int m, int n) { + using ALayout_t = typename ViewTypeA::array_layout; + using ViewTypeInfo = Kokkos::View; + using execution_space = typename Device::execution_space; + using ScalarA = typename ViewTypeA::value_type; + using ats = Kokkos::ArithTraits; + + Kokkos::Random_XorShift64_Pool rand_pool(13718); + + int minMN(std::min(m, n)); + + // ******************************************************************** + // Create device views + // ******************************************************************** + ViewTypeA A("A", m, n); + ViewTypeA Aorig("Aorig", m, n); + ViewTypeTau Tau("Tau", minMN); + ViewTypeInfo Info("Info", 1); + + // ******************************************************************** + // Create host mirrors of device views + // ******************************************************************** + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(Aorig); + typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); + + // ******************************************************************** + // Initialize data + // ******************************************************************** + if ((m == 3) && (n == 3)) { + if constexpr (Kokkos::ArithTraits::is_complex) { + h_A(0, 0).real() = 12.; + h_A(0, 1).real() = -51.; + h_A(0, 2).real() = 4.; + + h_A(1, 0).real() = 6.; + h_A(1, 1).real() = 167.; + h_A(1, 2).real() = -68.; + + h_A(2, 0).real() = -4.; + h_A(2, 1).real() = 24.; + h_A(2, 2).real() = -41.; + + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + h_A(i, j).imag() = 0.; + } + } + } else { + h_A(0, 0) = 12.; + h_A(0, 1) = -51.; + h_A(0, 2) = 4.; + + h_A(1, 0) = 6.; + h_A(1, 1) = 167.; + h_A(1, 2) = -68.; + + h_A(2, 0) = -4.; + h_A(2, 1) = 24.; + h_A(2, 2) = -41.; + } + + Kokkos::deep_copy(A, h_A); + } else { + Kokkos::fill_random(A, rand_pool, + Kokkos::rand, + ScalarA>::max()); + Kokkos::deep_copy(h_A, A); + } + + Kokkos::deep_copy(h_Aorig, h_A); + +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl; + } + } +#endif + + Kokkos::fence(); + + // ******************************************************************** + // Perform the QR factorization + // ******************************************************************** + try { + execution_space space{}; + KokkosLapack::geqrf(space, A, Tau, Info); + } catch (const std::runtime_error& e) { + std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" + << std::endl; + FAIL(); + return; + } + + Kokkos::fence(); + + Kokkos::deep_copy(h_info, Info); + EXPECT_EQ(h_info[0], 0) << "Failed geqrf() test: Info[0] = " << h_info[0]; + + // ******************************************************************** + // Get the results + // ******************************************************************** + Kokkos::deep_copy(h_A, A); + Kokkos::deep_copy(h_tau, Tau); + + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8); + if constexpr (std::is_same_v< + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type, + float>) { + absTol = 5.e-5; + } + +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG + std::cout << "info[0] = " << h_info[0] << std::endl; + for (int i(0); i < minMN; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "Aoutput(" << i << "," << j << ") = " << std::setprecision(16) << h_A(i,j) << std::endl; + } + } + for (int i(0); i < minMN; ++i) { + std::cout << "tau(" << i << ") = " << h_tau[i] << std::setprecision(16) << std::endl; + } + std::cout << "absTol = " << absTol << std::endl; +#endif + + // ******************************************************************** + // Check outputs h_A and h_tau + // ******************************************************************** + if ((m == 3) && (n == 3)) { + std::vector> refMatrix(m); + for (int i(0); i < m; ++i) { + refMatrix[i].resize(n, Kokkos::ArithTraits::zero()); + } + + std::vector refTau(m, Kokkos::ArithTraits::zero()); + + if constexpr (Kokkos::ArithTraits::is_complex) { + refMatrix[0][0].real() = -14.; + refMatrix[0][1].real() = -21.; + refMatrix[0][2].real() = 14.; + + refMatrix[1][0].real() = 0.2307692307692308; + refMatrix[1][1].real() = -175.; + refMatrix[1][2].real() = 70.; + + refMatrix[2][0].real() = -0.1538461538461539; + refMatrix[2][1].real() = 1. / 18.; + refMatrix[2][2].real() = -35.; + + refTau[0].real() = 1.857142857142857; + refTau[1].real() = 1.993846153846154; + refTau[2].real() = 0.; + } else { + refMatrix[0][0] = -14.; + refMatrix[0][1] = -21.; + refMatrix[0][2] = 14.; + + refMatrix[1][0] = 0.2307692307692308; + refMatrix[1][1] = -175.; + refMatrix[1][2] = 70.; + + refMatrix[2][0] = -0.1538461538461539; + refMatrix[2][1] = 1. / 18.; + refMatrix[2][2] = -35.; + + refTau[0] = 1.857142857142857; + refTau[1] = 1.993846153846154; + refTau[2] = 0.; + } + + { + bool test_flag_A = true; + for (int i(0); (i < m) && test_flag_A; ++i) { + for (int j(0); (j < n) && test_flag_A; ++j) { + if (ats::abs(h_A(i, j) - refMatrix[i][j]) > absTol) { + std::cout << "h_Aoutput checking" + << ", m = " << m << ", n = " << n << ", i = " << i + << ", j = " << j + << ", h_Aoutput(i,j) = " << std::setprecision(16) + << h_A(i, j) + << ", refMatrix(i,j) = " << std::setprecision(16) + << refMatrix[i][j] + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_A(i, j) - refMatrix[i][j]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_A = false; + } + } + } + ASSERT_EQ(test_flag_A, true); + } + + { + bool test_flag_tau = true; + for (int i(0); (i < m) && test_flag_tau; ++i) { + if (ats::abs(h_tau[i] - refTau[i]) > absTol) { + std::cout << "tau checking" + << ", m = " << m << ", n = " << n << ", i = " << i + << ", h_tau(i,j) = " << std::setprecision(16) << h_tau[i] + << ", refTau(i,j) = " << std::setprecision(16) << refTau[i] + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_tau[i] - refTau[i]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_tau = false; + } + } + ASSERT_EQ(test_flag_tau, true); + } + } + + // ******************************************************************** + // Compute Q, R, and QR + // ******************************************************************** + ViewTypeA Q("Q", m, m); + ViewTypeA R("R", m, n); + ViewTypeA QR("QR", m, n); + + typename ViewTypeA::HostMirror h_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::HostMirror h_R = Kokkos::create_mirror_view(R); + typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR); + + getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); + +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; + } + } + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl; + } + } + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl; + } + } +#endif + + // ******************************************************************** + // Check Q, R, and QR + // ******************************************************************** + if ((m == 3) && (n == 3)) { + std::vector> refQ(m); + for (int i(0); i < m; ++i) { + refQ[i].resize(n, Kokkos::ArithTraits::zero()); + } + + std::vector> refR(m); + for (int i(0); i < m; ++i) { + refR[i].resize(n, Kokkos::ArithTraits::zero()); + } + +#if 0 + Q = [ -6/7 69/175 58/175 + -3/7 -158/175 -6/175 + 2/7 -6/35 33/35 ] + + R = [ -14 -21 14 + 0 -175 70 + 0 0 -35 ] +#endif + + if constexpr (Kokkos::ArithTraits::is_complex) { + refQ[0][0].real() = -6. / 7.; + refQ[0][1].real() = 69. / 175.; + refQ[0][2].real() = 58. / 175.; + + refQ[1][0].real() = -3. / 7.; + refQ[1][1].real() = -158. / 175.; + refQ[1][2].real() = -6. / 175.; + + refQ[2][0].real() = 2. / 7.; + refQ[2][1].real() = -6. / 35.; + refQ[2][2].real() = 33. / 35.; + + refR[0][0].real() = -14.; + refR[0][1].real() = -21.; + refR[0][2].real() = 14.; + + refR[1][1].real() = -175.; + refR[1][2].real() = 70.; + + refR[2][2].real() = -35.; + } else { + refQ[0][0] = -6. / 7.; + refQ[0][1] = 69. / 175.; + refQ[0][2] = 58. / 175.; + + refQ[1][0] = -3. / 7.; + refQ[1][1] = -158. / 175.; + refQ[1][2] = -6. / 175.; + + refQ[2][0] = 2. / 7.; + refQ[2][1] = -6. / 35.; + refQ[2][2] = 33. / 35.; + + refR[0][0] = -14.; + refR[0][1] = -21.; + refR[0][2] = 14.; + + refR[1][1] = -175.; + refR[1][2] = 70.; + + refR[2][2] = -35.; + } + + { + bool test_flag_Q = true; + for (int i(0); (i < m) && test_flag_Q; ++i) { + for (int j(0); (j < n) && test_flag_Q; ++j) { + if (ats::abs(h_Q(i, j) - refQ[i][j]) > absTol) { + std::cout << "Q checking" + << ", m = " << m << ", n = " << n << ", i = " << i + << ", j = " << j + << ", h_Q(i,j) = " << std::setprecision(16) << h_Q(i, j) + << ", refQ(i,j) = " << std::setprecision(16) << refQ[i][j] + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_Q(i, j) - refQ[i][j]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_Q = false; + } + } + } + ASSERT_EQ(test_flag_Q, true); + } + + { + bool test_flag_R = true; + for (int i(0); (i < m) && test_flag_R; ++i) { + for (int j(0); (j < n) && test_flag_R; ++j) { + if (ats::abs(h_R(i, j) - refR[i][j]) > absTol) { + std::cout << "R checking" + << ", m = " << m << ", n = " << n << ", i = " << i + << ", j = " << j + << ", h_R(i,j) = " << std::setprecision(16) << h_R(i, j) + << ", refR(i,j) = " << std::setprecision(16) << refR[i][j] + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_R(i, j) - refR[i][j]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_R = false; + } + } + } + ASSERT_EQ(test_flag_R, true); + } + } + + // ******************************************************************** + // Check that A = QR + // ******************************************************************** + { + bool test_flag_QR = true; + for (int i(0); (i < m) && test_flag_QR; ++i) { + for (int j(0); (j < n) && test_flag_QR; ++j) { + if (ats::abs(h_QR(i, j) - h_Aorig(i, j)) > absTol) { + std::cout << "QR checking" + << ", m = " << m << ", n = " << n << ", i = " << i + << ", j = " << j + << ", h_Aorig(i,j) = " << std::setprecision(16) + << h_Aorig(i, j) + << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i, j) + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_QR(i, j) - h_Aorig(i, j)) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QR = false; + } + } + } + ASSERT_EQ(test_flag_QR, true); + } +} + +} // namespace Test + +template +void test_geqrf() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_ll = Kokkos::View; + using view_type_tau_ll = Kokkos::View; + + Test::impl_test_geqrf(1, 1); + Test::impl_test_geqrf(2, 1); + Test::impl_test_geqrf(2, 2); + Test::impl_test_geqrf(3, 1); + Test::impl_test_geqrf(3, 2); + Test::impl_test_geqrf(3, 3); + + Test::impl_test_geqrf(100, 100); + + Test::impl_test_geqrf(100, 70); + + Test::impl_test_geqrf(70, 100); +#endif +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float"); + test_geqrf(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_double"); + test_geqrf(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float"); + test_geqrf, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double"); + test_geqrf, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#endif // CUDA+CUSOLVER or HIP+ROCSOLVER or LAPACK+HOST