From 1be165936bbc7cf839cd9f56246919e0bb66203b Mon Sep 17 00:00:00 2001 From: y Date: Fri, 28 Jul 2023 15:15:32 -0500 Subject: [PATCH 01/38] Transition from numpy.distutils to scikit-build In preparation to switch from using deprecated ICC to its successor ICX, changed build system from deprecated numpy.distutils to scikit-build. Renamed files: loops_intel -> mkl_umath_loops Bumped up the version of the package from 0.1.1 to 0.1.2 Co-authored-by: Oleksandr Pavlyk Co-authored-by: Andres Guzman-Ballen --- .gitignore | 96 +++++ CMakeLists.txt | 124 +++++++ build.sh | 18 + icpx_for_conda.cfg | 1 + mkl_umath/_version.py | 2 +- mkl_umath/generate_umath.py | 16 +- mkl_umath/src/{patch.pyx => _patch.pyx} | 0 mkl_umath/src/fast_loop_macros.h | 4 + mkl_umath/src/loops_intel.h.src | 306 ---------------- ...oops_intel.c.src => mkl_umath_loops.c.src} | 327 ++++++++---------- mkl_umath/src/mkl_umath_loops.h.src | 306 ++++++++++++++++ mkl_umath/tests/test_basic.py | 6 +- setup.py | 149 +++++--- mkl_umath/setup.py => template | 0 14 files changed, 812 insertions(+), 543 deletions(-) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 build.sh create mode 100644 icpx_for_conda.cfg rename mkl_umath/src/{patch.pyx => _patch.pyx} (100%) delete mode 100644 mkl_umath/src/loops_intel.h.src rename mkl_umath/src/{loops_intel.c.src => mkl_umath_loops.c.src} (88%) create mode 100644 mkl_umath/src/mkl_umath_loops.h.src rename mkl_umath/setup.py => template (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7cc71d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,96 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions and binary files +*.o +*.so +*.so.* +*.exe +*.lib +*.dll + +# CMake build and local install directory +build +_skbuild +build_cmake +install + +# Code project files +.vscode + +# Eclipse project files +.project +.pydevproject + +# Emacs temp files +*~ + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg +dpctl_conda_pkg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ +junit.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# pyenv python configuration file +.python-version + +_cmake_test_compile + +# generated numpy files +mkl_umath/src/__umath_generated.c +mkl_umath/src/mkl_umath_loops.c +mkl_umath/src/mkl_umath_loops.h +mkl_umath/src/_patch.c + +# moved cmake scripts +dpctl/resources/cmake diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..87adc4c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,124 @@ +cmake_minimum_required(VERSION 3.21...3.25 FATAL_ERROR) + +if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.24") + cmake_policy(SET CMP0135 NEW) +endif() + +project(mkl_umath + LANGUAGES C + DESCRIPTION "mkl_umath module" +) + +find_package(Python COMPONENTS Interpreter Development REQUIRED) +find_package(NumPy REQUIRED) +find_package(PythonExtensions REQUIRED) + +set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"") +find_package(Cython REQUIRED) + +set(MKL_ARCH intel64) +set(MKL_LINK sdl) +set(MKL_THREADING intel_thread) +set(MKL_INTERFACE ilp64) +# MKL_ARCH: None, set to ` intel64` by default +# MKL_ROOT /localdisk/work/aguzmanb/Development/miniconda3.py310/envs/numpy_umath_prefix.v5 +# MKL_DPCPP_LINK: None, set to ` dynamic` by default +# MKL_LINK: None, set to ` dynamic` by default +# MKL_DPCPP_INTERFACE_FULL: None, set to ` intel_ilp64` by default +# MKL_INTERFACE_FULL: None, set to ` intel_ilp64` by default +# MKL_DPCPP_THREADING: None, set to ` tbb_thread` by default +# MKL_THREADING: None, set to ` intel_thread` by default +find_package(MKL REQUIRED) + +if(WIN32) + string(CONCAT WARNING_FLAGS + "-Wall " + "-Wextra " + "-Winit-self " + "-Wunused-function " + "-Wuninitialized " + "-Wmissing-declarations " + "-Wstrict-prototypes " + "-Wno-unused-parameter " + ) + string(CONCAT SDL_FLAGS + "/GS " + "/DynamicBase " + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" + ) + set(MKL_UMATH_LDFLAGS "/NXCompat;/DynamicBase") +elseif(UNIX) + string(CONCAT WARNING_FLAGS + "-Wall " + "-Wextra " + "-Winit-self " + "-Wunused-function " + "-Wuninitialized " + "-Wmissing-declarations " + "-Wstrict-prototypes " + "-Wno-unused-parameter " + "-fdiagnostics-color=auto " + ) + string(CONCAT SDL_FLAGS + "-fstack-protector " + "-fstack-protector-all " + "-fpic " + "-fPIC " + "-D_FORTIFY_SOURCE=2 " + "-Wformat " + "-Wformat-security " +# "-fno-strict-overflow " # no-strict-overflow is implied by -fwrapv + "-fno-delete-null-pointer-checks " + "-fwrapv " + ) + string(CONCAT CFLAGS + "${WARNING_FLAGS}" + "${SDL_FLAGS}" + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${CFLAGS}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} ${CFLAGS} -O0 -g1 -DDEBUG" + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-incompatible-function-pointer-types ${CFLAGS}") + set(MKL_UMATH_LDFLAGS "-z,noexecstack,-z,relro,-z,now") +else() + message(FATAL_ERROR "Unsupported system.") +endif() + +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +# set_property(GLOBAL PROPERTY GLOBAL_DEPENDS_DEBUG_MODE 1) +set(_linker_options "LINKER:${MKL_UMATH_LDFLAGS}") + +set(_trgt mkl_umath_loops) +add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") +set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) +target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} ${MKL_INCLUDE_DIR}) +target_link_libraries(${_trgt} PRIVATE mkl_rt) +target_link_options(${_trgt} PRIVATE ${_linker_options}) +install(TARGETS ${_trgt} LIBRARY DESTINATION mkl_umath) + +add_library(_ufuncs MODULE "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") +target_include_directories(_ufuncs PRIVATE "mkl_umath/src" ${NumPy_INCLUDE_DIR} ${MKL_INCLUDE_DIR}) +target_compile_definitions(_ufuncs PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) +target_link_options(_ufuncs PRIVATE ${_linker_options}) +target_link_libraries(_ufuncs mkl_umath_loops) +python_extension_module(_ufuncs) +if (UNIX) + set_target_properties(_ufuncs PROPERTIES INSTALL_RPATH "$ORIGIN") +endif() +install(TARGETS _ufuncs LIBRARY DESTINATION mkl_umath) + +add_cython_target(_patch "mkl_umath/src/_patch.pyx" C OUTPUT_VAR _generated_src) +add_library(_patch MODULE ${_generated_src}) +target_include_directories(_patch PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) +target_compile_definitions(_patch PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) +target_link_libraries(_patch mkl_umath_loops) +python_extension_module(_patch) +if (UNIX) + set_target_properties(_patch PROPERTIES INSTALL_RPATH "$ORIGIN") +endif() +install(TARGETS _patch LIBRARY DESTINATION mkl_umath) diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..bd34337 --- /dev/null +++ b/build.sh @@ -0,0 +1,18 @@ +# This is necessary to help DPC++ find Intel libraries such as SVML, IRNG, etc in build prefix +export BUILD_PREFIX=$CONDA_PREFIX +export HOST=x86_64-conda-linux-gnu +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib" + +# Intel LLVM must cooperate with compiler and sysroot from conda +echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg +export ICPXCFG="$(pwd)/icpx_for_conda.cfg" +export ICXCFG="$(pwd)/icpx_for_conda.cfg" + +# if [ -e "_skbuild" ]; then +# python setup.py clean --all +# fi + +export CMAKE_GENERATOR="Ninja" +SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" +echo "python setup.py install ${SKBUILD_ARGS}" +python setup.py install ${SKBUILD_ARGS} diff --git a/icpx_for_conda.cfg b/icpx_for_conda.cfg new file mode 100644 index 0000000..d828bd2 --- /dev/null +++ b/icpx_for_conda.cfg @@ -0,0 +1 @@ +--gcc-toolchain=/localdisk/work/aguzmanb/Development/miniconda3.py310/envs/numpy_umath_prefix.v6 --sysroot=/localdisk/work/aguzmanb/Development/miniconda3.py310/envs/numpy_umath_prefix.v6/x86_64-conda-linux-gnu/sysroot -target x86_64-conda-linux-gnu diff --git a/mkl_umath/_version.py b/mkl_umath/_version.py index df9144c..10939f0 100644 --- a/mkl_umath/_version.py +++ b/mkl_umath/_version.py @@ -1 +1 @@ -__version__ = '0.1.1' +__version__ = '0.1.2' diff --git a/mkl_umath/generate_umath.py b/mkl_umath/generate_umath.py index 7ff39b2..cc2034f 100644 --- a/mkl_umath/generate_umath.py +++ b/mkl_umath/generate_umath.py @@ -343,12 +343,6 @@ def english_upper(s): None, TD(inexactvec + cmplxvec), ), -'floor_divide': - Ufunc(2, 1, None, - docstrings.get('numpy.core.umath.floor_divide'), - None, - TD(inexactvec + cmplxvec), - ), 'true_divide': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.true_divide'), @@ -797,16 +791,16 @@ def make_arrays(funcdict): tname = english_upper(chartoname[t.type]) datalist.append('(void *)NULL') funclist.append( - '%s_%s_%s_%s' % (tname, t.in_, t.out, name)) + 'mkl_umath_%s_%s_%s_%s' % (tname, t.in_, t.out, name)) elif isinstance(t.func_data, FuncNameSuffix): datalist.append('(void *)NULL') tname = english_upper(chartoname[t.type]) funclist.append( - '%s_%s_%s' % (tname, name, t.func_data.suffix)) + 'mkl_umath_%s_%s_%s' % (tname, name, t.func_data.suffix)) elif t.func_data is None: datalist.append('(void *)NULL') tname = english_upper(chartoname[t.type]) - funclist.append('%s_%s' % (tname, name)) + funclist.append('mkl_umath_%s_%s' % (tname, name)) if t.simd is not None: for vt in t.simd: code2list.append(textwrap.dedent("""\ @@ -936,8 +930,10 @@ def make_code(funcdict, filename): Please make changes to the code generator program (%s) **/ #include "Python.h" + #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + #include "numpy/arrayobject.h" #include "numpy/ufuncobject.h" - #include "loops_intel.h" + #include "mkl_umath_loops.h" %s static int diff --git a/mkl_umath/src/patch.pyx b/mkl_umath/src/_patch.pyx similarity index 100% rename from mkl_umath/src/patch.pyx rename to mkl_umath/src/_patch.pyx diff --git a/mkl_umath/src/fast_loop_macros.h b/mkl_umath/src/fast_loop_macros.h index 50f9d41..d26174c 100644 --- a/mkl_umath/src/fast_loop_macros.h +++ b/mkl_umath/src/fast_loop_macros.h @@ -41,6 +41,10 @@ #define NPY_PRAGMA_VECTOR _Pragma("vector") #define NPY_PRAGMA_NOVECTOR _Pragma("novector") #define NPY_ASSUME_ALIGNED(p, b) __assume_aligned((p), (b)); +#elif defined(__clang__) +#define NPY_PRAGMA_VECTOR _Pragma("clang loop vectorize(enable)") +#define NPY_PRAGMA_NOVECTOR _Pragma("clang loop vectorize(disable)") +#define NPY_ASSUME_ALIGNED(p, b) #else #define NPY_PRAGMA_VECTOR _Pragma("GCC ivdep") #define NPY_PRAGMA_NOVECTOR diff --git a/mkl_umath/src/loops_intel.h.src b/mkl_umath/src/loops_intel.h.src deleted file mode 100644 index c45bab4..0000000 --- a/mkl_umath/src/loops_intel.h.src +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Copyright (c) 2019-2021, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _MKL_UMATH_LOOPS_H_ -#define _MKL_UMATH_LOOPS_H_ - -#include "numpy/ndarraytypes.h" - -#include - -/**begin repeat - * Float types - * #TYPE = FLOAT, DOUBLE# - */ - -NPY_NO_EXPORT void -@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_invsqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_exp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_exp2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_expm1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_erf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log10(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log1p(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_cos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_sin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_tan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arccos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arcsin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arctan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_cosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_sinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_tanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arccosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arcsinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arctanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_fabs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_floor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_ceil(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_rint(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_trunc(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_cbrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -/**begin repeat1 - * Arithmetic - * # kind = add, subtract, multiply, divide# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -/**begin repeat1 - * Arithmetic - * # kind = equal, not_equal, less, less_equal, greater, greater_equal, - * logical_and, logical_or# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -/**begin repeat1 - * #kind = isnan, isinf, isfinite, signbit# - **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - - -NPY_NO_EXPORT void -@TYPE@_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -/**begin repeat1 - * #kind = maximum, minimum, fmax, fmin# - **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -#define @TYPE@_true_divide @TYPE@_divide - -/**end repeat**/ - -/* - ***************************************************************************** - ** COMPLEX LOOPS ** - ***************************************************************************** - */ - -#define CGE(xr,xi,yr,yi) (xr > yr || (xr == yr && xi >= yi)); -#define CLE(xr,xi,yr,yi) (xr < yr || (xr == yr && xi <= yi)); -#define CGT(xr,xi,yr,yi) (xr > yr || (xr == yr && xi > yi)); -#define CLT(xr,xi,yr,yi) (xr < yr || (xr == yr && xi < yi)); -#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi); -#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi); - -/**begin repeat - * complex types - * #TYPE = CFLOAT, CDOUBLE# - */ - -/**begin repeat1 - * arithmetic - * #kind = add, subtract# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - - -/**begin repeat1 - * arithmetic - * #kind = greater, greater_equal, less, less_equal, equal, - not_equal, logical_and, logical_or, logical_xor, logical_not, - isnan, isinf, isfinite# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -/**begin repeat1 - * arithmetic - * #kind = maximum, minimum, fmax, fmin# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -#define @TYPE@_true_divide @TYPE@_divide - -/**end repeat**/ - -#undef CGE -#undef CLE -#undef CGT -#undef CLT -#undef CEQ -#undef CNE - -#endif diff --git a/mkl_umath/src/loops_intel.c.src b/mkl_umath/src/mkl_umath_loops.c.src similarity index 88% rename from mkl_umath/src/loops_intel.c.src rename to mkl_umath/src/mkl_umath_loops.c.src index 0a199dc..b5cbbaf 100644 --- a/mkl_umath/src/loops_intel.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -29,7 +29,6 @@ #include "mkl.h" #include #include -#include "mathimf.h" #include "Python.h" #define NPY_NO_DEPRECATED_API NPY_API_VERSION @@ -40,7 +39,7 @@ #include "numpy/ufuncobject.h" #include "numpy/npy_math.h" #include "blocking_utils.h" -#include "loops_intel.h" +#include "mkl_umath_loops.h" /* Adapated from NumPy's source code. * https://github.com/numpy/numpy/blob/main/LICENSE.txt */ @@ -223,8 +222,8 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus) * #scalarf = sqrtf, sqrt# */ -NPY_NO_EXPORT void -@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -251,8 +250,8 @@ NPY_NO_EXPORT void * #scalarf = (1.0f)/sqrtf, (1.0)/sqrt# */ -NPY_NO_EXPORT void -@TYPE@_invsqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_invsqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -280,8 +279,8 @@ NPY_NO_EXPORT void * #scalarf = expf, exp# */ -NPY_NO_EXPORT void -@TYPE@_exp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { int ignore_fpstatus = 0; @@ -318,8 +317,8 @@ NPY_NO_EXPORT void */ /* TODO: Use VML */ -NPY_NO_EXPORT void -@TYPE@_exp2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP_DISPATCH( DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) @@ -339,8 +338,8 @@ NPY_NO_EXPORT void * #scalarf = expm1f, expm1# */ -NPY_NO_EXPORT void -@TYPE@_expm1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_expm1(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -367,8 +366,8 @@ NPY_NO_EXPORT void * #scalarf = erff, erf# */ -NPY_NO_EXPORT void -@TYPE@_erf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_erf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -395,8 +394,8 @@ NPY_NO_EXPORT void * #scalarf = logf, log# */ -NPY_NO_EXPORT void -@TYPE@_log(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -424,8 +423,8 @@ NPY_NO_EXPORT void */ /* TODO: Use VML */ -NPY_NO_EXPORT void -@TYPE@_log2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP_DISPATCH( DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) @@ -445,8 +444,8 @@ NPY_NO_EXPORT void * #scalarf = log10f, log10# */ -NPY_NO_EXPORT void -@TYPE@_log10(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log10(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -473,8 +472,8 @@ NPY_NO_EXPORT void * #scalarf = log1pf, log1p# */ -NPY_NO_EXPORT void -@TYPE@_log1p(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log1p(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -501,8 +500,8 @@ NPY_NO_EXPORT void * #scalarf = cosf, cos# */ -NPY_NO_EXPORT void -@TYPE@_cos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_cos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -529,8 +528,8 @@ NPY_NO_EXPORT void * #scalarf = sinf, sin# */ -NPY_NO_EXPORT void -@TYPE@_sin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -557,8 +556,8 @@ NPY_NO_EXPORT void * #scalarf = tanf, tan# */ -NPY_NO_EXPORT void -@TYPE@_tan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_tan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -585,8 +584,8 @@ NPY_NO_EXPORT void * #scalarf = acosf, acos# */ -NPY_NO_EXPORT void -@TYPE@_arccos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arccos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -613,8 +612,8 @@ NPY_NO_EXPORT void * #scalarf = asinf, asin# */ -NPY_NO_EXPORT void -@TYPE@_arcsin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arcsin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -641,8 +640,8 @@ NPY_NO_EXPORT void * #scalarf = atanf, atan# */ -NPY_NO_EXPORT void -@TYPE@_arctan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arctan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -669,8 +668,8 @@ NPY_NO_EXPORT void * #scalarf = coshf, cosh# */ -NPY_NO_EXPORT void -@TYPE@_cosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_cosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -697,8 +696,8 @@ NPY_NO_EXPORT void * #scalarf = sinhf, sinh# */ -NPY_NO_EXPORT void -@TYPE@_sinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -725,8 +724,8 @@ NPY_NO_EXPORT void * #scalarf = tanhf, tanh# */ -NPY_NO_EXPORT void -@TYPE@_tanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_tanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -753,8 +752,8 @@ NPY_NO_EXPORT void * #scalarf = acoshf, acosh# */ -NPY_NO_EXPORT void -@TYPE@_arccosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arccosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -781,8 +780,8 @@ NPY_NO_EXPORT void * #scalarf = asinhf, asinh# */ -NPY_NO_EXPORT void -@TYPE@_arcsinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arcsinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -809,8 +808,8 @@ NPY_NO_EXPORT void * #scalarf = atanhf, atanh# */ -NPY_NO_EXPORT void -@TYPE@_arctanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arctanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -837,8 +836,8 @@ NPY_NO_EXPORT void * #scalarf = fabsf, fabs# */ -NPY_NO_EXPORT void -@TYPE@_fabs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP_DISPATCH( DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) @@ -858,8 +857,8 @@ NPY_NO_EXPORT void * #scalarf = floorf, floor# */ -NPY_NO_EXPORT void -@TYPE@_floor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_floor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(steps[0] == sizeof(@type@) && steps[1] == sizeof(@type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -886,8 +885,8 @@ NPY_NO_EXPORT void * #scalarf = ceilf, ceil# */ -NPY_NO_EXPORT void -@TYPE@_ceil(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_ceil(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -914,8 +913,8 @@ NPY_NO_EXPORT void * #scalarf = rintf, rint# */ -NPY_NO_EXPORT void -@TYPE@_rint(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_rint(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(steps[0] == sizeof(@type@) && steps[1] == sizeof(@type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -942,8 +941,8 @@ NPY_NO_EXPORT void * #scalarf = truncf, trunc# */ -NPY_NO_EXPORT void -@TYPE@_trunc(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_trunc(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -970,8 +969,8 @@ NPY_NO_EXPORT void * #scalarf = cbrtf, cbrt# */ -NPY_NO_EXPORT void -@TYPE@_cbrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_cbrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_UNARY_CONT(@type@, @type@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && @@ -1094,8 +1093,8 @@ pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride) * # PW = 1# * # VML = Add# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1262,8 +1261,8 @@ NPY_NO_EXPORT void * # PW = 0# * # VML = Sub# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1430,8 +1429,8 @@ NPY_NO_EXPORT void * # PW = 0# * # VML = Mul# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1598,8 +1597,8 @@ NPY_NO_EXPORT void * # PW = 0# * # VML = Div# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1750,8 +1749,8 @@ NPY_NO_EXPORT void * logical_and, logical_or# * #OP = ==, !=, <, <=, >, >=, &&, ||# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { { BINARY_LOOP { @@ -1763,8 +1762,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_xor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const int t1 = !!*(@type@ *)ip1; @@ -1773,8 +1772,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_not(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1786,8 +1785,8 @@ NPY_NO_EXPORT void * #kind = isnan, isinf, isfinite, signbit# * #func = isnan, isinf, isfinite, signbit# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { { UNARY_LOOP { @@ -1799,8 +1798,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1808,8 +1807,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1818,8 +1817,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1832,8 +1831,8 @@ NPY_NO_EXPORT void * #kind = maximum, minimum# * #OP = >=, <=# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* */ if (IS_BINARY_REDUCE) { @@ -1863,8 +1862,8 @@ NPY_NO_EXPORT void * #kind = fmax, fmin# * #OP = >=, <=# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* */ if (IS_BINARY_REDUCE) { @@ -1887,19 +1886,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) -{ - BINARY_LOOP { - const @type@ in1 = *(@type@ *)ip1; - const @type@ in2 = *(@type@ *)ip2; - @type@ mod; - *((@type@ *)op1) = divmod@c@(in1, in2, &mod); - } -} - -NPY_NO_EXPORT void -@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1908,8 +1896,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_divmod(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP_TWO_OUT { const @type@ in1 = *(@type@ *)ip1; @@ -1918,8 +1906,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { #if @SUPPORTED_BY_VML@ if(IS_UNARY_CONT(@type@, @type@) && @@ -1937,8 +1925,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { #if @SUPPORTED_BY_VML@ if(IS_UNARY_CONT(@type@, @type@) && @@ -1956,16 +1944,16 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { OUTPUT_LOOP { *((@type@ *)op1) = 1; } } -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1973,8 +1961,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { #if @SUPPORTED_BY_VML@ if(IS_UNARY_CONT(@type@, @type@) && @@ -1995,8 +1983,8 @@ NPY_NO_EXPORT void feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */ } -NPY_NO_EXPORT void -@TYPE@_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_negative(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { { UNARY_LOOP { @@ -2006,8 +1994,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_positive(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -2015,8 +2003,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* Sign of nan is nan */ UNARY_LOOP { @@ -2025,8 +2013,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP_TWO_OUT { const @type@ in1 = *(@type@ *)ip1; @@ -2034,8 +2022,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_frexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP_TWO_OUT { const @type@ in1 = *(@type@ *)ip1; @@ -2043,8 +2031,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_ldexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -2053,8 +2041,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* * Additional loop to handle npy_long integer inputs (cf. #866, #1633). @@ -2083,7 +2071,7 @@ NPY_NO_EXPORT void } } -#define @TYPE@_true_divide @TYPE@_divide +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide /**end repeat**/ @@ -2200,8 +2188,8 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n, * #OP = +, -# * #PW = 1, 0# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if (IS_BINARY_REDUCE && @PW@) { npy_intp n = dimensions[0]; @@ -2227,8 +2215,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2240,8 +2228,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2272,33 +2260,12 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) -{ - BINARY_LOOP { - const @ftype@ in1r = ((@ftype@ *)ip1)[0]; - const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - const @ftype@ in2r = ((@ftype@ *)ip2)[0]; - const @ftype@ in2i = ((@ftype@ *)ip2)[1]; - if (fabs@c@(in2r) >= fabs@c@(in2i)) { - const @ftype@ rat = in2i/in2r; - ((@ftype@ *)op1)[0] = floor@c@((in1r + in1i*rat)/(in2r + in2i*rat)); - ((@ftype@ *)op1)[1] = 0; - } - else { - const @ftype@ rat = in2r/in2i; - ((@ftype@ *)op1)[0] = floor@c@((in1r*rat + in1i)/(in2i + in2r*rat)); - ((@ftype@ *)op1)[1] = 0; - } - } -} - /**begin repeat1 * #kind= greater, greater_equal, less, less_equal, equal, not_equal# * #OP = CGT, CGE, CLT, CLE, CEQ, CNE# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2315,8 +2282,8 @@ NPY_NO_EXPORT void #OP1 = ||, ||# #OP2 = &&, ||# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2328,8 +2295,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_xor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2342,8 +2309,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_not(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2357,8 +2324,8 @@ NPY_NO_EXPORT void * #func = isnan, isinf, isfinite# * #OP = ||, ||, &&# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2369,8 +2336,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2380,8 +2347,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2400,8 +2367,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { OUTPUT_LOOP { ((@ftype@ *)op1)[0] = 1; @@ -2409,8 +2376,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { +void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; const @ftype@ in1i = ((@ftype@ *)ip1)[1]; @@ -2419,8 +2386,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { int ignore_fpstatus = 0; @@ -2449,8 +2416,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@__arg(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2459,8 +2426,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* fixme: sign of nan is currently 0 */ UNARY_LOOP { @@ -2478,8 +2445,8 @@ NPY_NO_EXPORT void * #kind = maximum, minimum# * #OP = CGE, CLE# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2501,8 +2468,8 @@ NPY_NO_EXPORT void * #kind = fmax, fmin# * #OP = CGE, CLE# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2522,7 +2489,7 @@ NPY_NO_EXPORT void } /**end repeat1**/ -#define @TYPE@_true_divide @TYPE@_divide +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide /**end repeat**/ diff --git a/mkl_umath/src/mkl_umath_loops.h.src b/mkl_umath/src/mkl_umath_loops.h.src new file mode 100644 index 0000000..70a7e94 --- /dev/null +++ b/mkl_umath/src/mkl_umath_loops.h.src @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2019-2021, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MKL_UMATH_LOOPS_H_ +#define _MKL_UMATH_LOOPS_H_ + +#include "numpy/ndarraytypes.h" + +#include + +/**begin repeat + * Float types + * #TYPE = FLOAT, DOUBLE# + */ + +extern void +mkl_umath_@TYPE@_sqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_invsqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_expm1(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_erf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_log10(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_log1p(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_cos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_sin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_tan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_arccos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_arcsin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_arctan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_cosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_sinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_tanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_arccosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_arcsinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_arctanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_floor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_ceil(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_rint(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_trunc(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_cbrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * Arithmetic + * # kind = add, subtract, multiply, divide# + */ +extern void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + * Arithmetic + * # kind = equal, not_equal, less, less_equal, greater, greater_equal, + * logical_and, logical_or# + */ +extern void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +extern void +mkl_umath_@TYPE@_logical_xor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_logical_not(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * #kind = isnan, isinf, isfinite, signbit# + **/ +extern void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +extern void +mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + + +extern void +mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * #kind = maximum, minimum, fmax, fmin# + **/ +extern void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +extern void +mkl_umath_@TYPE@_floor_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_divmod(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_negative(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_positive(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_frexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_ldexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide + +/**end repeat**/ + +/* + ***************************************************************************** + ** COMPLEX LOOPS ** + ***************************************************************************** + */ + +#define CGE(xr,xi,yr,yi) (xr > yr || (xr == yr && xi >= yi)); +#define CLE(xr,xi,yr,yi) (xr < yr || (xr == yr && xi <= yi)); +#define CGT(xr,xi,yr,yi) (xr > yr || (xr == yr && xi > yi)); +#define CLT(xr,xi,yr,yi) (xr < yr || (xr == yr && xi < yi)); +#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi); +#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi); + +/**begin repeat + * complex types + * #TYPE = CFLOAT, CDOUBLE# + */ + +/**begin repeat1 + * arithmetic + * #kind = add, subtract# + */ +extern void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +extern void +mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +extern void +mkl_umath_@TYPE@_floor_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + + +/**begin repeat1 + * arithmetic + * #kind = greater, greater_equal, less, less_equal, equal, + not_equal, logical_and, logical_or, logical_xor, logical_not, + isnan, isinf, isfinite# + */ +extern void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +extern void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@__arg(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +extern void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +/**begin repeat1 + * arithmetic + * #kind = maximum, minimum, fmax, fmin# + */ +extern void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide + +/**end repeat**/ + +#undef CGE +#undef CLE +#undef CGT +#undef CLT +#undef CEQ +#undef CNE + +#endif diff --git a/mkl_umath/tests/test_basic.py b/mkl_umath/tests/test_basic.py index 14e5ded..664d4c8 100644 --- a/mkl_umath/tests/test_basic.py +++ b/mkl_umath/tests/test_basic.py @@ -41,9 +41,9 @@ def get_args(args_str): elif s == 'D': args.append(np.double(np.random.random_sample()) + np.double(np.random.random_sample()) * 1j) elif s == 'i': - args.append(np.int(np.random.randint(low=1, high=10))) + args.append(np.int_(np.random.randint(low=1, high=10))) elif s == 'l': - args.append(np.long(np.random.randint(low=1, high=10))) + args.append(np.longlong(np.random.randint(low=1, high=10))) else: raise ValueError("Unexpected type specified!") return tuple(args) @@ -86,7 +86,7 @@ def get_args(args_str): print("mkl res", mkl_res) print("npy res", np_res) - assert np.array_equal(mkl_res, np_res) + assert np.allclose(mkl_res, np_res) print("Test cases count:", len(test_cases)) print("All looks good!") diff --git a/setup.py b/setup.py index fb9b500..cd1d3b0 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -24,8 +24,24 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import importlib.machinery import io +import os import re +from distutils.dep_util import newer +from numpy.distutils.conv_template import process_file as process_c_file +from os import (getcwd, environ, makedirs) +from os import (getcwd, environ, makedirs) +from os.path import join, exists, abspath, dirname +from setuptools import Extension + +import skbuild +import skbuild.setuptools_wrap +import skbuild.utils +from skbuild.command.build_py import build_py as _skbuild_build_py +from skbuild.command.install import install as _skbuild_install + +# import versioneer with io.open('mkl_umath/_version.py', 'rt', encoding='utf8') as f: version = re.search(r'__version__ = \'(.*?)\'', f.read()).group(1) @@ -51,45 +67,92 @@ Operating System :: MacOS """ -def configuration(parent_package='',top_path=None): - from numpy.distutils.misc_util import Configuration - - config = Configuration(None, parent_package, top_path) - config.set_options(ignore_setup_xxx_py=True, - assume_default_configuration=True, - delegate_options_to_subpackages=True, - quiet=True) - - config.add_subpackage('mkl_umath') - - config.version = VERSION - - return config - - -def setup_package(): - from setuptools import setup - from numpy.distutils.core import setup - metadata = dict( - name = 'mkl_umath', - maintainer = "Intel Corp.", - maintainer_email = "scripting@intel.com", - description = "MKL-based universal functions for NumPy arrays", - long_description = """Universal functions for real and complex floating point arrays powered by Intel(R) Math Kernel Library Vector (Intel(R) MKL) and Intel(R) Short Vector Math Library (Intel(R) SVML)""", - url = "http://github.com/IntelPython/mkl_umath", - author = "Intel Corporation", - download_url = "http://github.com/IntelPython/mkl_umath", - license = 'BSD', - classifiers = [_f for _f in CLASSIFIERS.split('\n') if _f], - platforms = ["Windows", "Linux", "Mac OS-X"], - test_suite = 'nose.collector', - python_requires = '>=3.6', - install_requires = ['numpy'], - configuration = configuration - ) - setup(**metadata) - - return None - -if __name__ == '__main__': - setup_package() + +def load_module(name, fn): + """ + Credit: numpy.compat.npy_load_module + """ + return importlib.machinery.SourceFileLoader(name, fn).load_module() + +def separator_join(sep, strs): + """ + Joins non-empty arguments strings with dot. + + Credit: numpy.distutils.misc_util.dot_join + """ + assert isinstance(strs, (list, tuple)) + assert isinstance(sep, str) + return sep.join([si for si in strs if si]) + +pdir = join(dirname(__file__), 'mkl_umath') +wdir = join(pdir, 'src') + +generate_umath_py = join(pdir, 'generate_umath.py') +n = separator_join('_', ('mkl_umath', 'generate_umath')) +generate_umath = load_module(n, generate_umath_py) +del n + +def generate_umath_c(build_dir): + target_dir = join(build_dir, 'src') + target = join(target_dir, '__umath_generated.c') + if not exists(target_dir): + print("Folder {} was expected to exist, but creating".format(target_dir)) + makedirs(target_dir) + script = generate_umath_py + if newer(script, target): + with open(target, 'w') as f: + f.write(generate_umath.make_code(generate_umath.defdict, + generate_umath.__file__)) + return [] + +generate_umath_c(pdir) + +loops_header_templ = join(wdir, "mkl_umath_loops.h.src") +processed_loops_h_fn = join(wdir, "mkl_umath_loops.h") +loops_header_processed = process_c_file(loops_header_templ) + +with open(processed_loops_h_fn, 'w') as fid: + fid.write(loops_header_processed) + +loops_src_templ = join(wdir, "mkl_umath_loops.c.src") +processed_loops_src_fn = join(wdir, "mkl_umath_loops.c") +loops_src_processed = process_c_file(loops_src_templ) + +with open(processed_loops_src_fn, 'w') as fid: + fid.write(loops_src_processed) + +skbuild.setup( + name="mkl_umath", + version=VERSION, + ## cmdclass=_get_cmdclass(), + description = "MKL-based universal functions for NumPy arrays", + long_description = """Universal functions for real and complex floating point arrays powered by Intel(R) Math Kernel Library Vector (Intel(R) MKL) and Intel(R) Short Vector Math Library (Intel(R) SVML)""", + long_description_content_type="text/markdown", + license = 'BSD', + author="Intel Corporation", + url="http://github.com/IntelPython/mkl_umath", + packages=[ + "mkl_umath", + ], + package_data={"mkl_umath": ["tests/*.*", "tests/helper/*.py"]}, + include_package_data=True, + zip_safe=False, + setup_requires=["Cython"], + install_requires=[ + "numpy", + ], + extras_require={ + "docs": [ + "Cython", + "sphinx", + "sphinx_rtd_theme", + "pydot", + "graphviz", + "sphinxcontrib-programoutput", + ], + "coverage": ["Cython", "pytest", "pytest-cov", "coverage", "tomli"], + }, + keywords="mkl_umath", + classifiers=[_f for _f in CLASSIFIERS.split("\n") if _f], + platforms=["Linux", "Windows"] +) diff --git a/mkl_umath/setup.py b/template similarity index 100% rename from mkl_umath/setup.py rename to template From ad035905945667b762fd2b82e452b5e26ead4de7 Mon Sep 17 00:00:00 2001 From: Guzman-ballen Date: Wed, 16 Aug 2023 13:53:16 -0500 Subject: [PATCH 02/38] Add scikit-build support for Windows platform --- CMakeLists.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 87adc4c..9f0b16a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ if(WIN32) "-Wmissing-declarations " "-Wstrict-prototypes " "-Wno-unused-parameter " + "-Wno-implicit-function-declaration " ) string(CONCAT SDL_FLAGS "/GS " @@ -88,6 +89,10 @@ else() message(FATAL_ERROR "Unsupported system.") endif() +if (WIN32) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif() + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) # set_property(GLOBAL PROPERTY GLOBAL_DEPENDS_DEBUG_MODE 1) @@ -96,8 +101,11 @@ set(_linker_options "LINKER:${MKL_UMATH_LDFLAGS}") set(_trgt mkl_umath_loops) add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) -target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} ${MKL_INCLUDE_DIR}) +target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Library/include") target_link_libraries(${_trgt} PRIVATE mkl_rt) +if (WIN32) + target_link_directories(${_trgt} PRIVATE "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Libs") +endif() target_link_options(${_trgt} PRIVATE ${_linker_options}) install(TARGETS ${_trgt} LIBRARY DESTINATION mkl_umath) From c62b5d7ef8fd5cfec0ec58969ef5a9feb3988c9b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 2 Sep 2023 12:58:19 -0500 Subject: [PATCH 03/38] Remove stray leftover file --- template | 185 ------------------------------------------------------- 1 file changed, 185 deletions(-) delete mode 100644 template diff --git a/template b/template deleted file mode 100644 index 81a77bf..0000000 --- a/template +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019-2021, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys -from os import (getcwd, environ, makedirs) -from os.path import join, exists, abspath, dirname -import importlib.machinery # requires Python >= 3.4 -from distutils.dep_util import newer - -from numpy.distutils.ccompiler import new_compiler -from distutils.sysconfig import customize_compiler -import platform -from numpy import get_include as get_numpy_include -from distutils.sysconfig import get_python_inc as get_python_include - -def ensure_Intel_compiler(): - ccompiler = new_compiler() - customize_compiler(ccompiler) - if hasattr(ccompiler, 'compiler'): - compiler_name = ccompiler.compiler[0] - else: - compiler_name = ccompiler.__class__.__name__ - - assert ('icl' in compiler_name or 'icc' in compiler_name), \ - "Intel(R) C Compiler is required to build mkl_umath, found {}".format(compiler_name) - - -def load_module(name, fn): - """ - Credit: numpy.compat.npy_load_module - """ - return importlib.machinery.SourceFileLoader(name, fn).load_module() - - -def separator_join(sep, strs): - """ - Joins non-empty arguments strings with dot. - - Credit: numpy.distutils.misc_util.dot_join - """ - assert isinstance(strs, (list, tuple)) - assert isinstance(sep, str) - return sep.join([si for si in strs if si]) - - -def configuration(parent_package='',top_path=None): - from numpy.distutils.misc_util import Configuration - from numpy.distutils.system_info import get_info - config = Configuration('mkl_umath', parent_package, top_path) - - mkl_root = environ.get('MKLROOT', None) - if mkl_root: - mkl_info = { - 'include_dirs': [join(mkl_root, 'include')], - 'library_dirs': [join(mkl_root, 'lib'), join(mkl_root, 'lib', 'intel64')], - 'libraries': ['mkl_rt'] - } - else: - mkl_info = get_info('mkl') - - print(mkl_info) - mkl_include_dirs = mkl_info.get('include_dirs', []) - mkl_library_dirs = mkl_info.get('library_dirs', []) - mkl_libraries = mkl_info.get('libraries', ['mkl_rt']) - - pdir = dirname(__file__) - wdir = join(pdir, 'src') - mkl_info = get_info('mkl') - - generate_umath_py = join(pdir, 'generate_umath.py') - n = separator_join('_', (config.name, 'generate_umath')) - generate_umath = load_module(n, generate_umath_py) - del n - - def generate_umath_c(ext, build_dir): - target_dir = join(build_dir, 'src') - target = join(target_dir, '__umath_generated.c') - if not exists(target_dir): - print("Folder {} was expected to exist, but creating".format(target_dir)) - makedirs(target_dir) - script = generate_umath_py - if newer(script, target): - with open(target, 'w') as f: - f.write(generate_umath.make_code(generate_umath.defdict, - generate_umath.__file__)) - config.add_include_dirs(target_dir) - return [] - - sources = [generate_umath_c] - - # ensure_Intel_compiler() - - if platform.system() == "Windows": - eca = ['/fp:fast=2', '/Qimf-precision=high', '/Qprec-sqrt', '/Qstd=c99', '/Qprotect-parens'] - else: - eca = ['-fp-model', 'fast=2', '-fimf-precision=high', '-prec-sqrt', '-fprotect-parens'] - - numpy_include_dir = get_numpy_include() - python_include_dir = get_python_include() - config.add_library( - 'loops_intel', - sources = [ - join(wdir, 'loops_intel.h.src'), - join(wdir, 'loops_intel.c.src'), - ], - include_dirs = [wdir] + mkl_include_dirs + [numpy_include_dir, python_include_dir], - depends = [ - join(wdir, 'blocking_utils.h'), - join(wdir, 'fast_loop_macros.h'), - join(numpy_include_dir, 'numpy', '*object.h'), - join(python_include_dir, "Python.h") - ], - libraries=mkl_libraries, - extra_compiler_args=eca, - macros=getattr(config, 'define_macros', getattr(config.get_distribution(), 'define_macros', [])) - ) - - config.add_extension( - name = '_ufuncs', - sources = [ - join(wdir, 'ufuncsmodule.c'), - ] + sources, - depends = [ - join(wdir, 'loops_intel.c.src'), - join(wdir, 'loops_intel.h.src'), - ], - include_dirs = [wdir] + mkl_include_dirs, - libraries = mkl_libraries + ['loops_intel'], - library_dirs = mkl_library_dirs, - extra_compile_args = [ - '-DNDEBUG', - # '-ggdb', '-O0', '-Wall', '-Wextra', '-DDEBUG', - ] - ) - - from Cython.Build import cythonize - from setuptools import Extension - cythonize(Extension('_patch', sources=[join(wdir, 'patch.pyx'),])) - - config.add_extension( - name = '_patch', - sources = [ - join(wdir, 'patch.c'), - ], - libraries = mkl_libraries + ['loops_intel'], - library_dirs = mkl_library_dirs, - extra_compile_args = [ - '-DNDEBUG', - #'-ggdb', '-O0', '-Wall', '-Wextra', '-DDEBUG', - ] - ) - - config.add_data_dir('tests') - -# if have_cython: -# config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) - - return config - -if __name__ == '__main__': - from numpy.distutils.core import setup - setup(configuration=configuration) From 0319b439f6b7417acef6a7e64f5472032273f035 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 06:04:05 -0500 Subject: [PATCH 04/38] Add conda-recipe, a GH action workflow --- .github/workflows/conda-package.yml | 251 ++++++++++++++++++++++++++++ build.sh | 18 -- conda-recipe/bld.bat | 25 +++ conda-recipe/build.sh | 23 +++ conda-recipe/meta.yaml | 53 ++++++ conda-recipe/run_tests.bat | 1 + conda-recipe/run_tests.sh | 1 + icpx_for_conda.cfg | 1 - 8 files changed, 354 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/conda-package.yml delete mode 100644 build.sh create mode 100644 conda-recipe/bld.bat create mode 100644 conda-recipe/build.sh create mode 100644 conda-recipe/meta.yaml create mode 100644 conda-recipe/run_tests.bat create mode 100644 conda-recipe/run_tests.sh delete mode 100644 icpx_for_conda.cfg diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml new file mode 100644 index 0000000..b890920 --- /dev/null +++ b/.github/workflows/conda-package.yml @@ -0,0 +1,251 @@ +name: Conda package + +on: push + +env: + PACKAGE_NAME: mkl_umath + MODULE_NAME: mkl_umath + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set pkgs_dirs + run: | + echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 0 # Increase to reset cache + with: + path: ~/.conda/pkgs + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Add conda to system path + run: echo $CONDA/bin >> $GITHUB_PATH + - name: Install conda-build + run: conda install conda-build + - name: Build conda package + run: | + CHANNELS="-c conda-forge -c intel --override-channels" + VERSIONS="--python ${{ matrix.python }}" + TEST="--no-test" + + conda build \ + $TEST \ + $VERSIONS \ + $CHANNELS \ + conda-recipe + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: /usr/share/miniconda/conda-bld/linux-64/${{ env.PACKAGE_NAME }}-*.tar.bz2 + + test: + needs: build + runs-on: ${{ matrix.runner }} + + strategy: + matrix: + python: ['3.10'] + experimental: [false] + runner: [ubuntu-latest] + continue-on-error: ${{ matrix.experimental }} + env: + CHANNELS: -c intel -c main --override-channels + + steps: + - name: Download artifact + uses: actions/download-artifact@v3 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + - name: Add conda to system path + run: echo $CONDA/bin >> $GITHUB_PATH + - name: Install conda-build + run: conda install conda-build + - name: Create conda channel + run: | + mkdir -p $GITHUB_WORKSPACE/channel/linux-64 + mv ${PACKAGE_NAME}-*.tar.bz2 $GITHUB_WORKSPACE/channel/linux-64 + conda index $GITHUB_WORKSPACE/channel + # Test channel + conda search $PACKAGE_NAME -c $GITHUB_WORKSPACE/channel --override-channels + + - name: Collect dependencies + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + conda create -n test_mkl_umath $PACKAGE_NAME python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile + - name: Display lockfile + run: cat lockfile + - name: Set pkgs_dirs + run: | + echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 0 # Increase to reset cache + with: + path: ~/.conda/pkgs + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Install mkl_umath + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + conda create -n test_mkl_umath python=${{ matrix.python }} $PACKAGE_NAME pytest $CHANNELS + # Test installed packages + conda list -n test_mkl_umath + - name: Run tests + run: | + source $CONDA/etc/profile.d/conda.sh + conda activate test_mkl_umath + python -c "import mkl_umath, numpy as np; mkl_umath.use_in_numpy(); np.sin(np.linspace(0, 1, num=10**6));" + + build_windows: + runs-on: windows-latest + + strategy: + matrix: + python: ['3.10'] + env: + conda-bld: C:\Miniconda\conda-bld\win-64\ + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: conda-incubator/setup-miniconda@v2 + with: + auto-activate-base: true + conda-build-version: "*" + activate-environment: true + python-version: ${{ matrix.python }} + + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 3 # Increase to reset cache + with: + path: /home/runner/conda_pkgs_dir + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + - name: Build conda package + run: conda build --no-test --python ${{ matrix.python }} -c intel -c conda-forge --override-channels conda-recipe + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 + + test_windows: + needs: build_windows + runs-on: ${{ matrix.runner }} + defaults: + run: + shell: cmd /C CALL {0} + strategy: + matrix: + python: ['3.10'] + experimental: [false] + runner: [windows-latest] + continue-on-error: ${{ matrix.experimental }} + env: + workdir: '${{ github.workspace }}' + CHANNELS: -c intel -c conda-forge --override-channels + + steps: + - name: Download artifact + uses: actions/download-artifact@v3 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + - uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + conda-build-version: '*' + miniconda-version: 'latest' + activate-environment: mkl_umath_test + python-version: ${{ matrix.python }} + - name: Create conda channel with the artifact bit + shell: cmd /C CALL {0} + run: | + echo ${{ env.workdir }} + mkdir ${{ env.workdir }}\channel\win-64 + move ${{ env.PACKAGE_NAME }}-*.tar.bz2 ${{ env.workdir }}\channel\win-64 + dir ${{ env.workdir }}\channel\win-64 + - name: Index the channel + shell: cmd /C CALL {0} + run: conda index ${{ env.workdir }}\channel + + - name: Dump mkl_umath version info from created channel into ver.json + shell: cmd /C CALL {0} + run: | + conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json > ${{ env.workdir }}\ver.json + - name: Output content of produced ver.json + shell: pwsh + run: Get-Content -Path ${{ env.workdir }}\ver.json + - name: Collect dependencies + shell: cmd /C CALL {0} + run: | + IF NOT EXIST ver.json ( + copy /Y ${{ env.workdir }}\ver.json . + ) + SET "SCRIPT=%VER_SCRIPT1% %VER_SCRIPT2%" + FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( + SET PACKAGE_VERSION=%%F + ) + conda install -n mkl_umath_test ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} --only-deps --dry-run > lockfile + - name: Display lockfile content + shell: pwsh + run: Get-Content -Path .\lockfile + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 0 # Increase to reset cache + with: + path: /home/runner/conda_pkgs_dir + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + - name: Install mkl_umath + shell: cmd /C CALL {0} + run: | + @ECHO ON + IF NOT EXIST ver.json ( + copy /Y ${{ env.workdir }}\ver.json . + ) + set "SCRIPT=%VER_SCRIPT1% %VER_SCRIPT2%" + FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( + SET PACKAGE_VERSION=%%F + ) + SET "TEST_DEPENDENCIES=pytest pytest-cov" + conda install -n mkl_umath_test ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% %TEST_DEPENDENCIES% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} + - name: Report content of test environment + shell: cmd /C CALL {0} + run: | + echo "Value of CONDA enviroment variable was: " %CONDA% + echo "Value of CONDA_PREFIX enviroment variable was: " %CONDA_PREFIX% + conda info && conda list -n mkl_umath_test + - name: Run tests + shell: cmd /C CALL {0} + run: >- + conda activate mkl_umath_test && python -c "import mkl_umath, numpy as np; mkl_umath.use_in_numpy(); np.sin(np.linspace(0, 1, num=10**6));" + diff --git a/build.sh b/build.sh deleted file mode 100644 index bd34337..0000000 --- a/build.sh +++ /dev/null @@ -1,18 +0,0 @@ -# This is necessary to help DPC++ find Intel libraries such as SVML, IRNG, etc in build prefix -export BUILD_PREFIX=$CONDA_PREFIX -export HOST=x86_64-conda-linux-gnu -export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib" - -# Intel LLVM must cooperate with compiler and sysroot from conda -echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg -export ICPXCFG="$(pwd)/icpx_for_conda.cfg" -export ICXCFG="$(pwd)/icpx_for_conda.cfg" - -# if [ -e "_skbuild" ]; then -# python setup.py clean --all -# fi - -export CMAKE_GENERATOR="Ninja" -SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" -echo "python setup.py install ${SKBUILD_ARGS}" -python setup.py install ${SKBUILD_ARGS} diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat new file mode 100644 index 0000000..e27318d --- /dev/null +++ b/conda-recipe/bld.bat @@ -0,0 +1,25 @@ +REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4 +set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%" +set "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" + +"%PYTHON%" setup.py clean --all +set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16 17.0.0 17) DO @( + REM set DIR_HINT if directory exists + IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" ( + SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V" + ) +) + +if NOT "%WHEELS_OUTPUT_FOLDER%"=="" ( + rem Install and assemble wheel package from the build bits + "%PYTHON%" setup.py install bdist_wheel %SKBUILD_ARGS% + if errorlevel 1 exit 1 + copy dist\mkl_umath*.whl %WHEELS_OUTPUT_FOLDER% + if errorlevel 1 exit 1 +) ELSE ( + rem Only install + "%PYTHON%" setup.py install %SKBUILD_ARGS% + if errorlevel 1 exit 1 +) diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh new file mode 100644 index 0000000..fc4459c --- /dev/null +++ b/conda-recipe/build.sh @@ -0,0 +1,23 @@ +# This is necessary to help DPC++ find Intel libraries such as SVML, IRNG, etc in build prefix +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib" + +# Intel LLVM must cooperate with compiler and sysroot from conda +echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icx_for_conda.cfg +export ICXCFG="$(pwd)/icx_for_conda.cfg" + +export CMAKE_GENERATOR="Ninja" +SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then + # Install packages and assemble wheel package from built bits + if [ "$CONDA_PY" == "36" ]; then + WHEELS_BUILD_ARGS="-p manylinux1_x86_64" + else + WHEELS_BUILD_ARGS="-p manylinux2014_x86_64" + fi + ${PYTHON} setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} + cp dist/mkl_umath*.whl ${WHEELS_OUTPUT_FOLDER} +else + # Perform regular install + ${PYTHON} setup.py install ${SKBUILD_ARGS} +fi diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml new file mode 100644 index 0000000..9f98829 --- /dev/null +++ b/conda-recipe/meta.yaml @@ -0,0 +1,53 @@ +{% set version = "0.1.2" %} +{% set buildnumber = 0 %} + +package: + name: mkl_umath + version: {{ version }} + +source: + path: ../ + +build: + number: {{ buildnumber }} + ignore_run_exports: + - blas + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - {{ compiler('dpcpp') }} >=2023.2 # [not osx] + - sysroot_linux-64 >=2.28 # [linux] + host: + - setuptools + - cmake + - ninja + - git + - cython + - scikit-build + - python + - mkl-devel + - numpy-base + run: + - python + - mkl + - mkl-service + - {{ pin_compatible('intel-cmplr-lib-rt') }} + - {{ pin_compatible('numpy') }} + +test: + source_files: + - mkl_umath/tests/test_basic.py + commands: + - python mkl_umath/tests/test_basic.py + imports: + - mkl_umath + - mkl_umath._ufuncs + - mkl_umath._patch + +about: + home: http://github.com/IntelPython/mkl_umath + license: BSD-3 + license_file: LICENSE.txt + summary: Universal functions for real and complex floating point arrays powered by Intel(R) Math Kernel Library Vector (Intel(R) MKL) and Intel(R) Short Vector Math Library (Intel(R) SVML) diff --git a/conda-recipe/run_tests.bat b/conda-recipe/run_tests.bat new file mode 100644 index 0000000..590db89 --- /dev/null +++ b/conda-recipe/run_tests.bat @@ -0,0 +1 @@ +%PYTHON% tests\test_basic.py \ No newline at end of file diff --git a/conda-recipe/run_tests.sh b/conda-recipe/run_tests.sh new file mode 100644 index 0000000..7bfca5d --- /dev/null +++ b/conda-recipe/run_tests.sh @@ -0,0 +1 @@ +$PYTHON tests/test_basic.py diff --git a/icpx_for_conda.cfg b/icpx_for_conda.cfg deleted file mode 100644 index d828bd2..0000000 --- a/icpx_for_conda.cfg +++ /dev/null @@ -1 +0,0 @@ ---gcc-toolchain=/localdisk/work/aguzmanb/Development/miniconda3.py310/envs/numpy_umath_prefix.v6 --sysroot=/localdisk/work/aguzmanb/Development/miniconda3.py310/envs/numpy_umath_prefix.v6/x86_64-conda-linux-gnu/sysroot -target x86_64-conda-linux-gnu From 0350a29aee9674993111f8fd1c9410f3d7fe578b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 06:20:32 -0500 Subject: [PATCH 05/38] Updated instructions to build from source using ICX --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a9f571c..006aa88 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Where `` should be the latest version from https://anaconda.org/i Intel(R) C compiler and Intel(R) Math Kernel Library are required to build `mkl_umath` from source: ```sh -# ensure that MKL is installed, icc is activated +# ensure that MKL is installed into Python prefix, Intel LLVM compiler is activated export MKLROOT=$CONDA_PREFIX -python setup.py config_cc --compiler=intelem build_ext --inplace +CC=icx pip install --no-build-isolation --no-deps -e . ``` From c2bbcd30c7137c635fe8a9023712aefab54c7f25 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 07:22:06 -0500 Subject: [PATCH 06/38] Try using /FORCE:UNRESOLVED for MSVC linker --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f0b16a..2314b6f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ if(WIN32) set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" ) - set(MKL_UMATH_LDFLAGS "/NXCompat;/DynamicBase") + set(MKL_UMATH_LDFLAGS "/NXCompat;/DynamicBase;/FORCE:UNRESOLVED") elseif(UNIX) string(CONCAT WARNING_FLAGS "-Wall " From ef16268d64b669688f5a3ca17e6c5bfaf80c4af2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 08:18:29 -0500 Subject: [PATCH 07/38] Use multiple linker options --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2314b6f..7ad25d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ if(WIN32) set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" ) - set(MKL_UMATH_LDFLAGS "/NXCompat;/DynamicBase;/FORCE:UNRESOLVED") + set(MKL_UMATH_LINKER_OPTIONS "LINKER:/NXCompat;LINKER:/DynamicBase;LINKER:/FORCE:UNRESOLVED") elseif(UNIX) string(CONCAT WARNING_FLAGS "-Wall " @@ -84,7 +84,7 @@ elseif(UNIX) "${CMAKE_C_FLAGS_DEBUG} ${CFLAGS} -O0 -g1 -DDEBUG" ) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-incompatible-function-pointer-types ${CFLAGS}") - set(MKL_UMATH_LDFLAGS "-z,noexecstack,-z,relro,-z,now") + set(MKL_UMATH_LINKER_OPTIONS "LINKER:-z,noexecstack,-z,relro,-z,now") else() message(FATAL_ERROR "Unsupported system.") endif() @@ -96,7 +96,7 @@ endif() set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) # set_property(GLOBAL PROPERTY GLOBAL_DEPENDS_DEBUG_MODE 1) -set(_linker_options "LINKER:${MKL_UMATH_LDFLAGS}") +set(_linker_options ${MKL_UMATH_LINKER_OPTIONS}) set(_trgt mkl_umath_loops) add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") From fd833e5c563a2325ab138af4c6b4df30549fab63 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 08:29:01 -0500 Subject: [PATCH 08/38] Updated copyright year to 2023 --- mkl_umath/__init__.py | 2 +- mkl_umath/generate_umath.py | 2 +- mkl_umath/src/mkl_umath_loops.c.src | 2 +- mkl_umath/src/mkl_umath_loops.h.src | 2 +- mkl_umath/tests/test_basic.py | 2 +- mkl_umath/ufunc_docstrings.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mkl_umath/__init__.py b/mkl_umath/__init__.py index 92960ad..a6e2927 100644 --- a/mkl_umath/__init__.py +++ b/mkl_umath/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: diff --git a/mkl_umath/generate_umath.py b/mkl_umath/generate_umath.py index cc2034f..e6609ab 100644 --- a/mkl_umath/generate_umath.py +++ b/mkl_umath/generate_umath.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index b5cbbaf..be3e8d3 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, Intel Corporation + * Copyright (c) 2019-2023, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/mkl_umath/src/mkl_umath_loops.h.src b/mkl_umath/src/mkl_umath_loops.h.src index 70a7e94..7dccf0a 100644 --- a/mkl_umath/src/mkl_umath_loops.h.src +++ b/mkl_umath/src/mkl_umath_loops.h.src @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, Intel Corporation + * Copyright (c) 2019-2023, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/mkl_umath/tests/test_basic.py b/mkl_umath/tests/test_basic.py index 664d4c8..1a9fc53 100644 --- a/mkl_umath/tests/test_basic.py +++ b/mkl_umath/tests/test_basic.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: diff --git a/mkl_umath/ufunc_docstrings.py b/mkl_umath/ufunc_docstrings.py index 5abc3af..79877e2 100644 --- a/mkl_umath/ufunc_docstrings.py +++ b/mkl_umath/ufunc_docstrings.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: From 589046defc55af641d1259f388ab5db7509c616f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 08:59:32 -0500 Subject: [PATCH 09/38] Link mkl_umath_loops to Python lib --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ad25d2..e375544 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ if(WIN32) set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" ) - set(MKL_UMATH_LINKER_OPTIONS "LINKER:/NXCompat;LINKER:/DynamicBase;LINKER:/FORCE:UNRESOLVED") + set(MKL_UMATH_LINKER_OPTIONS "LINKER:/NXCompat;LINKER:/DynamicBase") elseif(UNIX) string(CONCAT WARNING_FLAGS "-Wall " @@ -102,7 +102,7 @@ set(_trgt mkl_umath_loops) add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Library/include") -target_link_libraries(${_trgt} PRIVATE mkl_rt) +target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) if (WIN32) target_link_directories(${_trgt} PRIVATE "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Libs") endif() From e80e7108d4d424ec1e13f092ac68b2a482f257b7 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 09:29:43 -0500 Subject: [PATCH 10/38] No need to export all symbols --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e375544..f2cb2e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,10 +89,6 @@ else() message(FATAL_ERROR "Unsupported system.") endif() -if (WIN32) - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -endif() - set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) # set_property(GLOBAL PROPERTY GLOBAL_DEPENDS_DEBUG_MODE 1) From effe815b51db631d531731a1652386543487cfdb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 10:07:23 -0500 Subject: [PATCH 11/38] Removed stray hard path --- CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f2cb2e0..7c2813a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,9 +99,6 @@ add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Library/include") target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) -if (WIN32) - target_link_directories(${_trgt} PRIVATE "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Libs") -endif() target_link_options(${_trgt} PRIVATE ${_linker_options}) install(TARGETS ${_trgt} LIBRARY DESTINATION mkl_umath) From bb87659942e8acf77c263f91503a9f9c5be42993 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 10:07:30 -0500 Subject: [PATCH 12/38] Ensure symbols are property annotated for export --- mkl_umath/src/mkl_umath_loops.h.src | 205 +++++++++++++++++++--------- 1 file changed, 140 insertions(+), 65 deletions(-) diff --git a/mkl_umath/src/mkl_umath_loops.h.src b/mkl_umath/src/mkl_umath_loops.h.src index 7dccf0a..c643c20 100644 --- a/mkl_umath/src/mkl_umath_loops.h.src +++ b/mkl_umath/src/mkl_umath_loops.h.src @@ -32,100 +32,139 @@ #include +#ifdef _WIN32 +#ifdef mkl_umath_loops_EXPORTS +#define MKL_UMATH_API __declspec(dllexport) +#else +#define MKL_UMATH_API __declspec(dllimport) +#endif +#else +#define MKL_UMATH_API +#endif + /**begin repeat * Float types * #TYPE = FLOAT, DOUBLE# */ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_sqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_invsqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_expm1(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_erf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_log10(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_log1p(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_cos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_sin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_tan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_arccos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_arcsin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_arctan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_cosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_sinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_tanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_arccosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_arcsinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_arctanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_floor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_ceil(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_rint(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_trunc(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_cbrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**begin repeat1 * Arithmetic * # kind = add, subtract, multiply, divide# */ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ @@ -134,83 +173,106 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp * # kind = equal, not_equal, less, less_equal, greater, greater_equal, * logical_and, logical_or# */ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_logical_xor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_logical_not(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**begin repeat1 * #kind = isnan, isinf, isfinite, signbit# **/ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**begin repeat1 * #kind = maximum, minimum, fmax, fmin# **/ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_floor_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_divmod(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_negative(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_positive(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_frexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_ldexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); #define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide @@ -239,17 +301,21 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i * arithmetic * #kind = add, subtract# */ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_floor_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); @@ -259,36 +325,45 @@ mkl_umath_@TYPE@_floor_divide(char **args, const npy_intp *dimensions, const npy not_equal, logical_and, logical_or, logical_xor, logical_not, isnan, isinf, isfinite# */ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@__arg(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); /**begin repeat1 * arithmetic * #kind = maximum, minimum, fmax, fmin# */ -extern void +MKL_UMATH_API +void mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ From e3db203b4718081a3ebf9324d65f30e46fed5eb1 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Sep 2023 12:53:15 -0500 Subject: [PATCH 13/38] Specify ARCHIVE/RUNTIME/LIBRARY destinations for mkl_umath on Windows --- CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c2813a..edef098 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,11 @@ set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Library/include") target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) target_link_options(${_trgt} PRIVATE ${_linker_options}) -install(TARGETS ${_trgt} LIBRARY DESTINATION mkl_umath) +install(TARGETS ${_trgt} + LIBRARY DESTINATION mkl_umath + ARCHIVE DESTINATION mkl_umath + RUNTIME DESTINATION mkl_umath +) add_library(_ufuncs MODULE "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") target_include_directories(_ufuncs PRIVATE "mkl_umath/src" ${NumPy_INCLUDE_DIR} ${MKL_INCLUDE_DIR}) From fe29ae613e1c9bc9078634b85dd2178e402b163a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Sep 2023 04:37:45 -0500 Subject: [PATCH 14/38] Use vendored copy of conv_template script --- _vendored/README.md | 5 + _vendored/__init__.py | 1 + _vendored/conv_template.py | 329 +++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 4 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 _vendored/README.md create mode 100644 _vendored/__init__.py create mode 100644 _vendored/conv_template.py diff --git a/_vendored/README.md b/_vendored/README.md new file mode 100644 index 0000000..0ebafcb --- /dev/null +++ b/_vendored/README.md @@ -0,0 +1,5 @@ +## Vendored files + +File `conv_template.py` is copied from NumPy's numpy/distutils folder, since +`numpy.distutils` is absent from the installation layout starting with +Python 3.12 \ No newline at end of file diff --git a/_vendored/__init__.py b/_vendored/__init__.py new file mode 100644 index 0000000..fa81ada --- /dev/null +++ b/_vendored/__init__.py @@ -0,0 +1 @@ +# empty file diff --git a/_vendored/conv_template.py b/_vendored/conv_template.py new file mode 100644 index 0000000..c8933d1 --- /dev/null +++ b/_vendored/conv_template.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 +""" +takes templated file .xxx.src and produces .xxx file where .xxx is +.i or .c or .h, using the following template rules + +/**begin repeat -- on a line by itself marks the start of a repeated code + segment +/**end repeat**/ -- on a line by itself marks it's end + +After the /**begin repeat and before the */, all the named templates are placed +these should all have the same number of replacements + +Repeat blocks can be nested, with each nested block labeled with its depth, +i.e. +/**begin repeat1 + *.... + */ +/**end repeat1**/ + +When using nested loops, you can optionally exclude particular +combinations of the variables using (inside the comment portion of the inner loop): + + :exclude: var1=value1, var2=value2, ... + +This will exclude the pattern where var1 is value1 and var2 is value2 when +the result is being generated. + + +In the main body each replace will use one entry from the list of named replacements + + Note that all #..# forms in a block must have the same number of + comma-separated entries. + +Example: + + An input file containing + + /**begin repeat + * #a = 1,2,3# + * #b = 1,2,3# + */ + + /**begin repeat1 + * #c = ted, jim# + */ + @a@, @b@, @c@ + /**end repeat1**/ + + /**end repeat**/ + + produces + + line 1 "template.c.src" + + /* + ********************************************************************* + ** This file was autogenerated from a template DO NOT EDIT!!** + ** Changes should be made to the original source (.src) file ** + ********************************************************************* + */ + + #line 9 + 1, 1, ted + + #line 9 + 1, 1, jim + + #line 9 + 2, 2, ted + + #line 9 + 2, 2, jim + + #line 9 + 3, 3, ted + + #line 9 + 3, 3, jim + +""" + +__all__ = ['process_str', 'process_file'] + +import os +import sys +import re + +# names for replacement that are already global. +global_names = {} + +# header placed at the front of head processed file +header =\ +""" +/* + ***************************************************************************** + ** This file was autogenerated from a template DO NOT EDIT!!!! ** + ** Changes should be made to the original source (.src) file ** + ***************************************************************************** + */ + +""" +# Parse string for repeat loops +def parse_structure(astr, level): + """ + The returned line number is from the beginning of the string, starting + at zero. Returns an empty list if no loops found. + + """ + if level == 0 : + loopbeg = "/**begin repeat" + loopend = "/**end repeat**/" + else : + loopbeg = "/**begin repeat%d" % level + loopend = "/**end repeat%d**/" % level + + ind = 0 + line = 0 + spanlist = [] + while True: + start = astr.find(loopbeg, ind) + if start == -1: + break + start2 = astr.find("*/", start) + start2 = astr.find("\n", start2) + fini1 = astr.find(loopend, start2) + fini2 = astr.find("\n", fini1) + line += astr.count("\n", ind, start2+1) + spanlist.append((start, start2+1, fini1, fini2+1, line)) + line += astr.count("\n", start2+1, fini2) + ind = fini2 + spanlist.sort() + return spanlist + + +def paren_repl(obj): + torep = obj.group(1) + numrep = obj.group(2) + return ','.join([torep]*int(numrep)) + +parenrep = re.compile(r"\(([^)]*)\)\*(\d+)") +plainrep = re.compile(r"([^*]+)\*(\d+)") +def parse_values(astr): + # replaces all occurrences of '(a,b,c)*4' in astr + # with 'a,b,c,a,b,c,a,b,c,a,b,c'. Empty braces generate + # empty values, i.e., ()*4 yields ',,,'. The result is + # split at ',' and a list of values returned. + astr = parenrep.sub(paren_repl, astr) + # replaces occurrences of xxx*3 with xxx, xxx, xxx + astr = ','.join([plainrep.sub(paren_repl, x.strip()) + for x in astr.split(',')]) + return astr.split(',') + + +stripast = re.compile(r"\n\s*\*?") +named_re = re.compile(r"#\s*(\w*)\s*=([^#]*)#") +exclude_vars_re = re.compile(r"(\w*)=(\w*)") +exclude_re = re.compile(":exclude:") +def parse_loop_header(loophead) : + """Find all named replacements in the header + + Returns a list of dictionaries, one for each loop iteration, + where each key is a name to be substituted and the corresponding + value is the replacement string. + + Also return a list of exclusions. The exclusions are dictionaries + of key value pairs. There can be more than one exclusion. + [{'var1':'value1', 'var2', 'value2'[,...]}, ...] + + """ + # Strip out '\n' and leading '*', if any, in continuation lines. + # This should not effect code previous to this change as + # continuation lines were not allowed. + loophead = stripast.sub("", loophead) + # parse out the names and lists of values + names = [] + reps = named_re.findall(loophead) + nsub = None + for rep in reps: + name = rep[0] + vals = parse_values(rep[1]) + size = len(vals) + if nsub is None : + nsub = size + elif nsub != size : + msg = "Mismatch in number of values, %d != %d\n%s = %s" + raise ValueError(msg % (nsub, size, name, vals)) + names.append((name, vals)) + + + # Find any exclude variables + excludes = [] + + for obj in exclude_re.finditer(loophead): + span = obj.span() + # find next newline + endline = loophead.find('\n', span[1]) + substr = loophead[span[1]:endline] + ex_names = exclude_vars_re.findall(substr) + excludes.append(dict(ex_names)) + + # generate list of dictionaries, one for each template iteration + dlist = [] + if nsub is None : + raise ValueError("No substitution variables found") + for i in range(nsub): + tmp = {name: vals[i] for name, vals in names} + dlist.append(tmp) + return dlist + +replace_re = re.compile(r"@(\w+)@") +def parse_string(astr, env, level, line) : + lineno = "#line %d\n" % line + + # local function for string replacement, uses env + def replace(match): + name = match.group(1) + try : + val = env[name] + except KeyError: + msg = 'line %d: no definition of key "%s"'%(line, name) + raise ValueError(msg) from None + return val + + code = [lineno] + struct = parse_structure(astr, level) + if struct : + # recurse over inner loops + oldend = 0 + newlevel = level + 1 + for sub in struct: + pref = astr[oldend:sub[0]] + head = astr[sub[0]:sub[1]] + text = astr[sub[1]:sub[2]] + oldend = sub[3] + newline = line + sub[4] + code.append(replace_re.sub(replace, pref)) + try : + envlist = parse_loop_header(head) + except ValueError as e: + msg = "line %d: %s" % (newline, e) + raise ValueError(msg) + for newenv in envlist : + newenv.update(env) + newcode = parse_string(text, newenv, newlevel, newline) + code.extend(newcode) + suff = astr[oldend:] + code.append(replace_re.sub(replace, suff)) + else : + # replace keys + code.append(replace_re.sub(replace, astr)) + code.append('\n') + return ''.join(code) + +def process_str(astr): + code = [header] + code.extend(parse_string(astr, global_names, 0, 1)) + return ''.join(code) + + +include_src_re = re.compile(r"(\n|\A)#include\s*['\"]" + r"(?P[\w\d./\\]+[.]src)['\"]", re.I) + +def resolve_includes(source): + d = os.path.dirname(source) + with open(source) as fid: + lines = [] + for line in fid: + m = include_src_re.match(line) + if m: + fn = m.group('name') + if not os.path.isabs(fn): + fn = os.path.join(d, fn) + if os.path.isfile(fn): + lines.extend(resolve_includes(fn)) + else: + lines.append(line) + else: + lines.append(line) + return lines + +def process_file(source): + lines = resolve_includes(source) + sourcefile = os.path.normcase(source).replace("\\", "\\\\") + try: + code = process_str(''.join(lines)) + except ValueError as e: + raise ValueError('In "%s" loop at %s' % (sourcefile, e)) from None + return '#line 1 "%s"\n%s' % (sourcefile, code) + + +def unique_key(adict): + # this obtains a unique key given a dictionary + # currently it works by appending together n of the letters of the + # current keys and increasing n until a unique key is found + # -- not particularly quick + allkeys = list(adict.keys()) + done = False + n = 1 + while not done: + newkey = "".join([x[:n] for x in allkeys]) + if newkey in allkeys: + n += 1 + else: + done = True + return newkey + + +def main(): + try: + file = sys.argv[1] + except IndexError: + fid = sys.stdin + outfile = sys.stdout + else: + fid = open(file, 'r') + (base, ext) = os.path.splitext(file) + newname = base + outfile = open(newname, 'w') + + allstr = fid.read() + try: + writestr = process_str(allstr) + except ValueError as e: + raise ValueError("In %s loop at %s" % (file, e)) from None + + outfile.write(writestr) + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index f3cb490..6dd3ae5 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ import os import re from distutils.dep_util import newer -from numpy.distutils.conv_template import process_file as process_c_file +from _vendored.conv_template import process_file as process_c_file from os import (getcwd, environ, makedirs) from os import (getcwd, environ, makedirs) from os.path import join, exists, abspath, dirname From 2c87ff707ee2df0aafa6fedc7d26a6ada91daeda Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Jan 2024 05:25:29 -0600 Subject: [PATCH 15/38] Removed duplicate import line --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 6dd3ae5..0ee7fa6 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,6 @@ from distutils.dep_util import newer from _vendored.conv_template import process_file as process_c_file from os import (getcwd, environ, makedirs) -from os import (getcwd, environ, makedirs) from os.path import join, exists, abspath, dirname from setuptools import Extension From de82deb4be79932c710bd96f60a7d942937105b0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Jan 2024 05:25:44 -0600 Subject: [PATCH 16/38] Removed hard-coded paths, updated to CMake 3.27 Used Python_add_library, instead of removed add_library followed by python_extension_module function from scikit-build. Removed superfluous comments --- CMakeLists.txt | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index edef098..c778fce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,6 @@ -cmake_minimum_required(VERSION 3.21...3.25 FATAL_ERROR) +cmake_minimum_required(VERSION 3.27...3.28 FATAL_ERROR) -if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.24") - cmake_policy(SET CMP0135 NEW) -endif() +cmake_policy(SET CMP0135 NEW) project(mkl_umath LANGUAGES C @@ -11,7 +9,6 @@ project(mkl_umath find_package(Python COMPONENTS Interpreter Development REQUIRED) find_package(NumPy REQUIRED) -find_package(PythonExtensions REQUIRED) set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"") find_package(Cython REQUIRED) @@ -20,14 +17,6 @@ set(MKL_ARCH intel64) set(MKL_LINK sdl) set(MKL_THREADING intel_thread) set(MKL_INTERFACE ilp64) -# MKL_ARCH: None, set to ` intel64` by default -# MKL_ROOT /localdisk/work/aguzmanb/Development/miniconda3.py310/envs/numpy_umath_prefix.v5 -# MKL_DPCPP_LINK: None, set to ` dynamic` by default -# MKL_LINK: None, set to ` dynamic` by default -# MKL_DPCPP_INTERFACE_FULL: None, set to ` intel_ilp64` by default -# MKL_INTERFACE_FULL: None, set to ` intel_ilp64` by default -# MKL_DPCPP_THREADING: None, set to ` tbb_thread` by default -# MKL_THREADING: None, set to ` intel_thread` by default find_package(MKL REQUIRED) if(WIN32) @@ -97,32 +86,32 @@ set(_linker_options ${MKL_UMATH_LINKER_OPTIONS}) set(_trgt mkl_umath_loops) add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) -target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR} "C:/Users/aguzmanb/Development/mambaforge/envs/mkl_umath_prefix/Library/include") +target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) target_link_options(${_trgt} PRIVATE ${_linker_options}) +target_compile_options(${_trgt} PRIVATE -fveclib=SVML) +target_compile_options(${_trgt} PRIVATE -fvectorize) install(TARGETS ${_trgt} LIBRARY DESTINATION mkl_umath ARCHIVE DESTINATION mkl_umath RUNTIME DESTINATION mkl_umath ) -add_library(_ufuncs MODULE "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") +Python_add_library(_ufuncs MODULE WITH_SOABI "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") target_include_directories(_ufuncs PRIVATE "mkl_umath/src" ${NumPy_INCLUDE_DIR} ${MKL_INCLUDE_DIR}) target_compile_definitions(_ufuncs PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) target_link_options(_ufuncs PRIVATE ${_linker_options}) -target_link_libraries(_ufuncs mkl_umath_loops) -python_extension_module(_ufuncs) +target_link_libraries(_ufuncs PRIVATE mkl_umath_loops) if (UNIX) set_target_properties(_ufuncs PROPERTIES INSTALL_RPATH "$ORIGIN") endif() install(TARGETS _ufuncs LIBRARY DESTINATION mkl_umath) add_cython_target(_patch "mkl_umath/src/_patch.pyx" C OUTPUT_VAR _generated_src) -add_library(_patch MODULE ${_generated_src}) +Python_add_library(_patch MODULE WITH_SOABI ${_generated_src}) target_include_directories(_patch PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) target_compile_definitions(_patch PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) -target_link_libraries(_patch mkl_umath_loops) -python_extension_module(_patch) +target_link_libraries(_patch PRIVATE mkl_umath_loops) if (UNIX) set_target_properties(_patch PROPERTIES INSTALL_RPATH "$ORIGIN") endif() From ea90d0f6387a91064db6f20715141752fafac303 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 10 Jan 2024 12:47:20 -0600 Subject: [PATCH 17/38] Changes to permit vectorization of most loops by ICX Some loops are not vectorized due to compiler's cost model analysis. Added CMake option OPTIMIZATION_REPORT (OFF by default). It would instruct compiler to generate optimization report for mkl_umath library. --- CMakeLists.txt | 8 + mkl_umath/src/fast_loop_macros.h | 22 +- mkl_umath/src/mkl_umath_loops.c.src | 579 +++++++++++++++++----------- 3 files changed, 378 insertions(+), 231 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c778fce..6fc047f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,11 @@ project(mkl_umath DESCRIPTION "mkl_umath module" ) +option(OPTIMIZATION_REPORT + "Whether to generate optimization vectorization report" + OFF +) + find_package(Python COMPONENTS Interpreter Development REQUIRED) find_package(NumPy REQUIRED) @@ -91,6 +96,9 @@ target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) target_link_options(${_trgt} PRIVATE ${_linker_options}) target_compile_options(${_trgt} PRIVATE -fveclib=SVML) target_compile_options(${_trgt} PRIVATE -fvectorize) +if(OPTIMIZATION_REPORT) + target_compile_options(${_trgt} PRIVATE -qopt-report=3) +endif() install(TARGETS ${_trgt} LIBRARY DESTINATION mkl_umath ARCHIVE DESTINATION mkl_umath diff --git a/mkl_umath/src/fast_loop_macros.h b/mkl_umath/src/fast_loop_macros.h index d26174c..12ef2e1 100644 --- a/mkl_umath/src/fast_loop_macros.h +++ b/mkl_umath/src/fast_loop_macros.h @@ -74,19 +74,19 @@ npy_intp is1 = steps[0], os1 = steps[1];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, op1 += os1) + for(i = 0; i < n; ++i, ip1 += is1, op1 += os1) -#define UNARY_LOOP_VECTORIZED\ - char *ip1 = args[0], *op1 = args[1];\ - npy_intp is1 = steps[0], os1 = steps[1];\ +#define UNARY_LOOP_VECTORIZED(tin, tout)\ + tin *ip1 = (tin *) args[0];\ + tout *op1 = (tout *) args[1]; \ npy_intp n = dimensions[0];\ npy_intp i;\ NPY_PRAGMA_VECTOR\ - for(i = 0; i < n; i++, ip1 += is1, op1 += os1) + for(i = 0; i < n; ++i, ++ip1, ++op1) -#define UNARY_LOOP_DISPATCH(cond, body)\ +#define UNARY_LOOP_DISPATCH(tin, tout, cond, body)\ if (cond) {\ - UNARY_LOOP_VECTORIZED { body; }\ + UNARY_LOOP_VECTORIZED(tin, tout) { body; }\ } else {\ UNARY_LOOP { body; }\ } @@ -97,7 +97,7 @@ npy_intp is1 = steps[0], os1 = steps[1], os2 = steps[2];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2) + for(i = 0; i < n; ++i, ip1 += is1, op1 += os1, op2 += os2) /** (ip1, ip2) -> (op1) */ #define BINARY_LOOP\ @@ -105,7 +105,7 @@ npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) + for(i = 0; i < n; ++i, ip1 += is1, ip2 += is2, op1 += os1) /** (ip1, ip2) -> (op1, op2) */ #define BINARY_LOOP_TWO_OUT\ @@ -113,7 +113,7 @@ npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2], os2 = steps[3];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1, op2 += os2) + for(i = 0; i < n; ++i, ip1 += is1, ip2 += is2, op1 += os1, op2 += os2) /** (ip1, ip2, ip3) -> (op1) */ #define TERNARY_LOOP\ @@ -121,7 +121,7 @@ npy_intp is1 = steps[0], is2 = steps[1], is3 = steps[2], os1 = steps[3];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1) + for(i = 0; i < n; ++i, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1) /** @} */ diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index be3e8d3..50fc7ea 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -41,7 +41,7 @@ #include "blocking_utils.h" #include "mkl_umath_loops.h" -/* Adapated from NumPy's source code. +/* Adapated from NumPy's source code. * https://github.com/numpy/numpy/blob/main/LICENSE.txt */ /* @@ -142,13 +142,13 @@ static inline npy_double spacing(npy_double x) { if (isinf(x)) - return ((npy_double) NAN); + return ((npy_double) NAN); return copysign(nextafter(fabs(x), ((npy_double) INFINITY)), x) - x; } static inline npy_float spacingf(npy_float x) { if (isinff(x)) - return ((npy_float) NAN); + return ((npy_float) NAN); return copysignf(nextafterf(fabsf(x), INFINITY), x) - x; } @@ -225,18 +225,23 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus) void mkl_umath_@TYPE@_sqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@))) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Sqrt, dimensions[0], @type@, args[0], args[1]); /* v@c@Sqrt(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -253,18 +258,23 @@ mkl_umath_@TYPE@_sqrt(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_invsqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@InvSqrt, dimensions[0], @type@, args[0], args[1]); /* v@c@InvSqrt(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -282,24 +292,26 @@ mkl_umath_@TYPE@_invsqrt(char **args, const npy_intp *dimensions, const npy_intp void mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; int ignore_fpstatus = 0; - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@))) { + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { ignore_fpstatus = 1; CHUNKED_VML_CALL2(v@c@Exp, dimensions[0], @type@, args[0], args[1]); /* v@c@Exp(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; - if(in1 == -NPY_INFINITY@A@){ - ignore_fpstatus = 1; - } + ignore_fpstatus |= ((in1 == -NPY_INFINITY@A@) ? 1 : 0); *(@type@ *)op1 = @scalarf@(in1); - ) + ) } if(ignore_fpstatus) { feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); @@ -320,8 +332,14 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st void mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); @@ -341,18 +359,22 @@ mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_expm1(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { CHUNKED_VML_CALL2(v@c@Expm1, dimensions[0], @type@, args[0], args[1]); /* v@c@Expm1(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -369,18 +391,23 @@ mkl_umath_@TYPE@_expm1(char **args, const npy_intp *dimensions, const npy_intp * void mkl_umath_@TYPE@_erf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Erf, dimensions[0], @type@, args[0], args[1]); /* v@c@Erf(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -397,18 +424,23 @@ mkl_umath_@TYPE@_erf(char **args, const npy_intp *dimensions, const npy_intp *st void mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Ln, dimensions[0], @type@, args[0], args[1]); /* v@c@Ln(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -426,8 +458,14 @@ mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *st void mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); @@ -447,18 +485,23 @@ mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_log10(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Log10, dimensions[0], @type@, args[0], args[1]); /* v@c@Log10(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -475,18 +518,23 @@ mkl_umath_@TYPE@_log10(char **args, const npy_intp *dimensions, const npy_intp * void mkl_umath_@TYPE@_log1p(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Log1p, dimensions[0], @type@, args[0], args[1]); /* v@c@Log1p(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -503,18 +551,23 @@ mkl_umath_@TYPE@_log1p(char **args, const npy_intp *dimensions, const npy_intp * void mkl_umath_@TYPE@_cos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Cos, dimensions[0], @type@, args[0], args[1]); /* v@c@Cos(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -531,18 +584,23 @@ mkl_umath_@TYPE@_cos(char **args, const npy_intp *dimensions, const npy_intp *st void mkl_umath_@TYPE@_sin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Sin, dimensions[0], @type@, args[0], args[1]); /* v@c@Sin(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -559,18 +617,23 @@ mkl_umath_@TYPE@_sin(char **args, const npy_intp *dimensions, const npy_intp *st void mkl_umath_@TYPE@_tan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Tan, dimensions[0], @type@, args[0], args[1]); /* v@c@Tan(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -587,18 +650,23 @@ mkl_umath_@TYPE@_tan(char **args, const npy_intp *dimensions, const npy_intp *st void mkl_umath_@TYPE@_arccos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Acos, dimensions[0], @type@, args[0], args[1]); /* v@c@Acos(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -615,18 +683,23 @@ mkl_umath_@TYPE@_arccos(char **args, const npy_intp *dimensions, const npy_intp void mkl_umath_@TYPE@_arcsin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Asin, dimensions[0], @type@, args[0], args[1]); /* v@c@Asin(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -643,18 +716,23 @@ mkl_umath_@TYPE@_arcsin(char **args, const npy_intp *dimensions, const npy_intp void mkl_umath_@TYPE@_arctan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Atan, dimensions[0], @type@, args[0], args[1]); /* v@c@Atan(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -671,18 +749,23 @@ mkl_umath_@TYPE@_arctan(char **args, const npy_intp *dimensions, const npy_intp void mkl_umath_@TYPE@_cosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Cosh, dimensions[0], @type@, args[0], args[1]); /* v@c@Cosh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -699,18 +782,23 @@ mkl_umath_@TYPE@_cosh(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_sinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Sinh, dimensions[0], @type@, args[0], args[1]); /* v@c@Sinh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -727,18 +815,23 @@ mkl_umath_@TYPE@_sinh(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_tanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Tanh, dimensions[0], @type@, args[0], args[1]); /* v@c@Tanh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -755,18 +848,23 @@ mkl_umath_@TYPE@_tanh(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_arccosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Acosh, dimensions[0], @type@, args[0], args[1]); /* v@c@Acosh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -783,18 +881,23 @@ mkl_umath_@TYPE@_arccosh(char **args, const npy_intp *dimensions, const npy_intp void mkl_umath_@TYPE@_arcsinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Asinh, dimensions[0], @type@, args[0], args[1]); /* v@c@Asinh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -811,18 +914,23 @@ mkl_umath_@TYPE@_arcsinh(char **args, const npy_intp *dimensions, const npy_intp void mkl_umath_@TYPE@_arctanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Atanh, dimensions[0], @type@, args[0], args[1]); /* v@c@Atanh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -839,8 +947,14 @@ mkl_umath_@TYPE@_arctanh(char **args, const npy_intp *dimensions, const npy_intp void mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); @@ -860,18 +974,23 @@ mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_floor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(steps[0] == sizeof(@type@) && steps[1] == sizeof(@type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Floor, dimensions[0], @type@, args[0], args[1]); /* v@c@Floor(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -888,18 +1007,23 @@ mkl_umath_@TYPE@_floor(char **args, const npy_intp *dimensions, const npy_intp * void mkl_umath_@TYPE@_ceil(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Ceil, dimensions[0], @type@, args[0], args[1]); /* v@c@Ceil(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -916,18 +1040,23 @@ mkl_umath_@TYPE@_ceil(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_rint(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(steps[0] == sizeof(@type@) && steps[1] == sizeof(@type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Rint, dimensions[0], @type@, args[0], args[1]); /* v@c@Rint(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -944,18 +1073,23 @@ mkl_umath_@TYPE@_rint(char **args, const npy_intp *dimensions, const npy_intp *s void mkl_umath_@TYPE@_trunc(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Trunc, dimensions[0], @type@, args[0], args[1]); /* v@c@Trunc(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -972,18 +1106,23 @@ mkl_umath_@TYPE@_trunc(char **args, const npy_intp *dimensions, const npy_intp * void mkl_umath_@TYPE@_cbrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Cbrt, dimensions[0], @type@, args[0], args[1]); /* v@c@Cbrt(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -1126,19 +1265,19 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp @type@ *op1_shifted = op1 + peel; @type@ *ip2_shifted = ip2 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } @@ -1294,19 +1433,19 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp @type@ *ip2_shifted = ip2 + peel; @type@ *op1_shifted = op1 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } @@ -1462,19 +1601,19 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp @type@ *ip2_shifted = ip2 + peel; @type@ *op1_shifted = op1 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } @@ -1619,37 +1758,37 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp const npy_intp blocked_end = npy_blocked_end(peel, sizeof(@type@), vsize, n); npy_intp i; - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(i = 0; i < peel; i++) { op1[i] = ip1[i] @OP@ ip2[i]; } { npy_intp j, j_max = blocked_end - peel; - j_max &= (~0xf); - const npy_intp blocked_end = j_max + peel; + j_max &= (~0xf); + const npy_intp blocked_end = j_max + peel; if (j_max > 0) { @type@ *ip1_aligned = ip1 + peel, *op1_shifted = op1 + peel, *ip2_shifted = ip2 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } } - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(; i < n; i++) { op1[i] = ip1[i] @OP@ ip2[i]; } @@ -1665,7 +1804,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp npy_intp i; const @type@ ip1c = ip1[0]; - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(i = 0; i < peel; i++) { op1[i] = ip1c @OP@ ip2[i]; } @@ -1684,7 +1823,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp } } - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(; i < n; i++) { op1[i] = ip1c @OP@ ip2[i]; } @@ -1699,7 +1838,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp npy_intp i; const @type@ ip2c = ip2[0]; - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(i = 0; i < peel; i++) { op1[i] = ip1[i] @OP@ ip2c; } @@ -1718,7 +1857,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp } } - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(; i < n; i++) { op1[i] = ip1[i] @OP@ ip2c; } @@ -2147,13 +2286,13 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n, for (i = 8; i < n - (n % 8); i += 8) { /* small blocksizes seems to mess with hardware prefetch */ NPY_PREFETCH(a + (i + 512 /(npy_intp)sizeof(@ftype@))*stride, 0, 3); - r[0] += *((@ftype@ *)(a + (i + 0) * stride)); + r[0] += *((@ftype@ *)(a + (i + 0) * stride)); r[1] += *((@ftype@ *)(a + (i + 0) * stride + sizeof(@ftype@))); - r[2] += *((@ftype@ *)(a + (i + 2) * stride)); + r[2] += *((@ftype@ *)(a + (i + 2) * stride)); r[3] += *((@ftype@ *)(a + (i + 2) * stride + sizeof(@ftype@))); - r[4] += *((@ftype@ *)(a + (i + 4) * stride)); + r[4] += *((@ftype@ *)(a + (i + 4) * stride)); r[5] += *((@ftype@ *)(a + (i + 4) * stride + sizeof(@ftype@))); - r[6] += *((@ftype@ *)(a + (i + 6) * stride)); + r[6] += *((@ftype@ *)(a + (i + 6) * stride)); r[7] += *((@ftype@ *)(a + (i + 6) * stride + sizeof(@ftype@))); } From 9d1e3a3877961a4fd9685ff852909077131beb77 Mon Sep 17 00:00:00 2001 From: "Komarova, Evseniia" Date: Tue, 11 Jun 2024 20:09:11 +0200 Subject: [PATCH 18/38] add c99 standard --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fc047f..4f5990c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,7 @@ set(_linker_options ${MKL_UMATH_LINKER_OPTIONS}) set(_trgt mkl_umath_loops) add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) +set_target_properties(${_trgt} PROPERTIES C_STANDARD 99) target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) target_link_options(${_trgt} PRIVATE ${_linker_options}) From e26ba4d7a08a3f5c15419665d11342ecebb9b4f5 Mon Sep 17 00:00:00 2001 From: "Komarova, Evseniia" Date: Wed, 12 Jun 2024 15:53:08 +0200 Subject: [PATCH 19/38] add high precision flags --- CMakeLists.txt | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f5990c..8654308 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,13 @@ if(WIN32) "/GS " "/DynamicBase " ) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS}") + string(CONCAT PRECISION_FLAGS + "/fp:fast=2 " + "/Qimf-precision=high " + "/Qprec-sqrt " + "/Qprotect-parens " + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS} ${PRECISION_FLAGS}") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" ) @@ -73,7 +79,13 @@ elseif(UNIX) "${WARNING_FLAGS}" "${SDL_FLAGS}" ) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${CFLAGS}") + string(CONCAT PRECISION_FLAGS + "-prec-sqrt " + "-fprotect-parens " + "-fimf-precision=high " + "-fp-model fast=2 " + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${CFLAGS} ${PRECISION_FLAGS}") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${CFLAGS} -O0 -g1 -DDEBUG" ) @@ -90,8 +102,10 @@ set(_linker_options ${MKL_UMATH_LINKER_OPTIONS}) set(_trgt mkl_umath_loops) add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") -set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) -set_target_properties(${_trgt} PROPERTIES C_STANDARD 99) +set_target_properties(${_trgt} PROPERTIES + CMAKE_POSITION_INDEPENDENT_CODE ON + C_STANDARD 99 +) target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) target_link_options(${_trgt} PRIVATE ${_linker_options}) From 2d2448099999285735f059126066a0d10c5395a8 Mon Sep 17 00:00:00 2001 From: "Komarova, Evseniia" Date: Mon, 17 Jun 2024 12:41:25 +0200 Subject: [PATCH 20/38] replace test_basic with pytest --- conda-recipe/meta.yaml | 4 +++- mkl_umath/tests/test_basic.py | 37 +++++++++++++++++------------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 9f98829..ec41497 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -37,10 +37,12 @@ requirements: - {{ pin_compatible('numpy') }} test: + requires: + - pytest source_files: - mkl_umath/tests/test_basic.py commands: - - python mkl_umath/tests/test_basic.py + - pytest mkl_umath/tests/test_basic.py imports: - mkl_umath - mkl_umath._ufuncs diff --git a/mkl_umath/tests/test_basic.py b/mkl_umath/tests/test_basic.py index 1a9fc53..f0b4ae8 100644 --- a/mkl_umath/tests/test_basic.py +++ b/mkl_umath/tests/test_basic.py @@ -23,6 +23,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest import numpy as np import mkl_umath._ufuncs as mu import numpy.core.umath as nu @@ -49,11 +50,8 @@ def get_args(args_str): return tuple(args) umaths = [i for i in dir(mu) if isinstance(getattr(mu, i), np.ufunc)] - umaths.remove('arccosh') # expects input greater than 1 -# dictionary with test cases -# (umath, types) : args generated_cases = {} for umath in umaths: mkl_umath = getattr(mu, umath) @@ -64,29 +62,30 @@ def get_args(args_str): generated_cases[(umath, type)] = args additional_cases = { -('arccosh', 'f->f') : (np.single(np.random.random_sample() + 1),), -('arccosh', 'd->d') : (np.double(np.random.random_sample() + 1),), + ('arccosh', 'f->f'): (np.single(np.random.random_sample() + 1),), + ('arccosh', 'd->d'): (np.double(np.random.random_sample() + 1),), } -test_cases = {} -for d in (generated_cases, additional_cases): - test_cases.update(d) +test_cases = {**generated_cases, **additional_cases} -for case in test_cases: - umath = case[0] - type = case[1] +@pytest.mark.parametrize("case", list(test_cases.keys())) +def test_umath(case): + umath, type = case args = test_cases[case] mkl_umath = getattr(mu, umath) np_umath = getattr(nu, umath) print('*'*80) - print(umath, type) - print("args", args) + print(f"Testing {umath} with type {type}") + print("args:", args) + mkl_res = mkl_umath(*args) np_res = np_umath(*args) - print("mkl res", mkl_res) - print("npy res", np_res) - - assert np.allclose(mkl_res, np_res) + + print("mkl res:", mkl_res) + print("npy res:", np_res) + + assert np.allclose(mkl_res, np_res), f"Results for {umath} do not match" -print("Test cases count:", len(test_cases)) -print("All looks good!") +def test_cases_count(): + print("Test cases count:", len(test_cases)) + assert len(test_cases) > 0, "No test cases found" From 372bf68e998d0394c2b27f82a2110fdeba52fa8e Mon Sep 17 00:00:00 2001 From: "Komarova, Evseniia" Date: Tue, 18 Jun 2024 17:22:19 +0200 Subject: [PATCH 21/38] convert the generated integer to numpy.int64 using type --- mkl_umath/tests/test_basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkl_umath/tests/test_basic.py b/mkl_umath/tests/test_basic.py index f0b4ae8..88770a9 100644 --- a/mkl_umath/tests/test_basic.py +++ b/mkl_umath/tests/test_basic.py @@ -44,7 +44,7 @@ def get_args(args_str): elif s == 'i': args.append(np.int_(np.random.randint(low=1, high=10))) elif s == 'l': - args.append(np.longlong(np.random.randint(low=1, high=10))) + args.append(np.dtype('long').type(np.random.randint(low=1, high=10))) else: raise ValueError("Unexpected type specified!") return tuple(args) From 2cc4dd62d5fc3bac3233db3bd9d750684ca3637d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 11 Sep 2024 13:07:02 -0700 Subject: [PATCH 22/38] Changes to enable compilation with NumPy 2 --- CMakeLists.txt | 18 +++++++++--------- mkl_umath/src/mkl_umath_loops.c.src | 1 + mkl_umath/src/ufuncsmodule.h | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8654308..676daae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,8 +20,6 @@ find_package(Cython REQUIRED) set(MKL_ARCH intel64) set(MKL_LINK sdl) -set(MKL_THREADING intel_thread) -set(MKL_INTERFACE ilp64) find_package(MKL REQUIRED) if(WIN32) @@ -101,16 +99,16 @@ set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) set(_linker_options ${MKL_UMATH_LINKER_OPTIONS}) set(_trgt mkl_umath_loops) -add_library(${_trgt} SHARED "mkl_umath/src/mkl_umath_loops.c") +add_library(${_trgt} SHARED mkl_umath/src/mkl_umath_loops.c) set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON C_STANDARD 99 ) -target_include_directories(${_trgt} PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) -target_link_libraries(${_trgt} PRIVATE mkl_rt ${Python_LIBRARIES}) -target_link_options(${_trgt} PRIVATE ${_linker_options}) -target_compile_options(${_trgt} PRIVATE -fveclib=SVML) -target_compile_options(${_trgt} PRIVATE -fvectorize) +target_include_directories(${_trgt} PUBLIC mkl_umath/src/ ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) +target_link_libraries(${_trgt} PUBLIC MKL::MKL ${Python_LIBRARIES}) +target_link_options(${_trgt} PUBLIC ${_linker_options}) +target_compile_options(${_trgt} PUBLIC -fveclib=SVML) +target_compile_options(${_trgt} PUBLIC -fvectorize) if(OPTIMIZATION_REPORT) target_compile_options(${_trgt} PRIVATE -qopt-report=3) endif() @@ -120,11 +118,12 @@ install(TARGETS ${_trgt} RUNTIME DESTINATION mkl_umath ) -Python_add_library(_ufuncs MODULE WITH_SOABI "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") +python_add_library(_ufuncs MODULE WITH_SOABI "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") target_include_directories(_ufuncs PRIVATE "mkl_umath/src" ${NumPy_INCLUDE_DIR} ${MKL_INCLUDE_DIR}) target_compile_definitions(_ufuncs PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) target_link_options(_ufuncs PRIVATE ${_linker_options}) target_link_libraries(_ufuncs PRIVATE mkl_umath_loops) +set_target_properties(_ufuncs PROPERTIES C_STANDARD 99) if (UNIX) set_target_properties(_ufuncs PROPERTIES INSTALL_RPATH "$ORIGIN") endif() @@ -135,6 +134,7 @@ Python_add_library(_patch MODULE WITH_SOABI ${_generated_src}) target_include_directories(_patch PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) target_compile_definitions(_patch PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) target_link_libraries(_patch PRIVATE mkl_umath_loops) +set_target_properties(_patch PROPERTIES C_STANDARD 99) if (UNIX) set_target_properties(_patch PROPERTIES INSTALL_RPATH "$ORIGIN") endif() diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index 50fc7ea..92d62c6 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -32,6 +32,7 @@ #include "Python.h" #define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define NP_IMPORT_ARRAY #include "numpy/npy_common.h" #include "numpy/ndarraytypes.h" diff --git a/mkl_umath/src/ufuncsmodule.h b/mkl_umath/src/ufuncsmodule.h index 2526763..acb6bbd 100644 --- a/mkl_umath/src/ufuncsmodule.h +++ b/mkl_umath/src/ufuncsmodule.h @@ -25,6 +25,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "Python.h" +#define PY_ARRAY_UNIQUE_SYMBOL mkl_umath_ufunc_ext #include "numpy/arrayobject.h" #include "numpy/ndarraytypes.h" #include "numpy/ufuncobject.h" From 3377d38bf6a1a1f103959190f96f0437325e00d6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 11 Sep 2024 17:52:38 -0700 Subject: [PATCH 23/38] Provide w/a for ICC and recent libmmd library --- mkl_umath/src/mkl_umath_loops.c.src | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index 92d62c6..86a62c4 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -154,6 +154,11 @@ static inline npy_float spacingf(npy_float x) { return copysignf(nextafterf(fabsf(x), INFINITY), x) - x; } +#if defined(_MSC_VER) && defined(__INTEL_COMPILER) +extern __inline float __cdecl ldexpf( float _X, int _Y) { + return (float)ldexp(_X, _Y); +} +#endif /**begin repeat * Float types From ea9ef2246481d8eb039d3bea6c5af2bc7df8c57e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 17 Sep 2024 08:59:03 -0500 Subject: [PATCH 24/38] Find NumPy as Python component Adjust variables after move to use NumPy as Python component --- CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 676daae..a4e3533 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,13 +12,17 @@ option(OPTIMIZATION_REPORT OFF ) -find_package(Python COMPONENTS Interpreter Development REQUIRED) -find_package(NumPy REQUIRED) +find_package(Python COMPONENTS Interpreter Development NumPy REQUIRED) + +# Print out the discovered paths +include(CMakePrintHelpers) +cmake_print_variables(Python_INCLUDE_DIRS) +cmake_print_variables(Python_LIBRARIES) +cmake_print_variables(Python_NumPy_INCLUDE_DIRS) set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"") find_package(Cython REQUIRED) -set(MKL_ARCH intel64) set(MKL_LINK sdl) find_package(MKL REQUIRED) @@ -104,7 +108,7 @@ set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON C_STANDARD 99 ) -target_include_directories(${_trgt} PUBLIC mkl_umath/src/ ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) +target_include_directories(${_trgt} PUBLIC mkl_umath/src/ ${Python_NumPy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) target_link_libraries(${_trgt} PUBLIC MKL::MKL ${Python_LIBRARIES}) target_link_options(${_trgt} PUBLIC ${_linker_options}) target_compile_options(${_trgt} PUBLIC -fveclib=SVML) @@ -119,7 +123,7 @@ install(TARGETS ${_trgt} ) python_add_library(_ufuncs MODULE WITH_SOABI "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") -target_include_directories(_ufuncs PRIVATE "mkl_umath/src" ${NumPy_INCLUDE_DIR} ${MKL_INCLUDE_DIR}) +target_include_directories(_ufuncs PRIVATE "mkl_umath/src" ${Python_NumPy_INCLUDE_DIRS} ${MKL_INCLUDE_DIR}) target_compile_definitions(_ufuncs PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) target_link_options(_ufuncs PRIVATE ${_linker_options}) target_link_libraries(_ufuncs PRIVATE mkl_umath_loops) @@ -131,7 +135,7 @@ install(TARGETS _ufuncs LIBRARY DESTINATION mkl_umath) add_cython_target(_patch "mkl_umath/src/_patch.pyx" C OUTPUT_VAR _generated_src) Python_add_library(_patch MODULE WITH_SOABI ${_generated_src}) -target_include_directories(_patch PRIVATE "mkl_umath/src/" ${NumPy_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR}) +target_include_directories(_patch PRIVATE "mkl_umath/src/" ${Python_NumPy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) target_compile_definitions(_patch PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) target_link_libraries(_patch PRIVATE mkl_umath_loops) set_target_properties(_patch PROPERTIES C_STANDARD 99) From 8fcf5e4ae4318ce333a2bf84a2b5c99a0af6bfac Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 17 Sep 2024 09:53:18 -0500 Subject: [PATCH 25/38] _patch is to use language_level=3 --- mkl_umath/src/_patch.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkl_umath/src/_patch.pyx b/mkl_umath/src/_patch.pyx index 5814d54..fd78f8d 100644 --- a/mkl_umath/src/_patch.pyx +++ b/mkl_umath/src/_patch.pyx @@ -24,7 +24,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # distutils: language = c -# cython: language_level=2 +# cython: language_level=3 import mkl_umath._ufuncs as mu import numpy.core.umath as nu From 826a3308eec9ad2dd65f6dc14ae7c9549b92acd2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 18 Sep 2024 08:05:19 -0500 Subject: [PATCH 26/38] Replace use of -c intel channel, replace use of -c main Introduce conda-recipe-cf which does not depend on numpy-base. Use it in conda-packages workflow to enable building for wider range of Python versions than what is included in IDP. --- .github/workflows/conda-package.yml | 18 +++++----- conda-recipe-cf/bld.bat | 25 +++++++++++++ conda-recipe-cf/build.sh | 23 ++++++++++++ conda-recipe-cf/meta.yaml | 54 +++++++++++++++++++++++++++++ conda-recipe-cf/run_tests.bat | 1 + conda-recipe-cf/run_tests.sh | 1 + conda-recipe/meta.yaml | 2 +- 7 files changed, 114 insertions(+), 10 deletions(-) create mode 100644 conda-recipe-cf/bld.bat create mode 100644 conda-recipe-cf/build.sh create mode 100644 conda-recipe-cf/meta.yaml create mode 100644 conda-recipe-cf/run_tests.bat create mode 100644 conda-recipe-cf/run_tests.sh diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index b890920..0eefa64 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.10'] + python: ['3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 with: @@ -38,7 +38,7 @@ jobs: run: conda install conda-build - name: Build conda package run: | - CHANNELS="-c conda-forge -c intel --override-channels" + CHANNELS="-c conda-forge -c https://software.repos.intel.com/python/conda --override-channels" VERSIONS="--python ${{ matrix.python }}" TEST="--no-test" @@ -46,7 +46,7 @@ jobs: $TEST \ $VERSIONS \ $CHANNELS \ - conda-recipe + conda-recipe-cf - name: Upload artifact uses: actions/upload-artifact@v3 with: @@ -59,12 +59,12 @@ jobs: strategy: matrix: - python: ['3.10'] + python: ['3.10', '3.11', '3.12'] experimental: [false] runner: [ubuntu-latest] continue-on-error: ${{ matrix.experimental }} env: - CHANNELS: -c intel -c main --override-channels + CHANNELS: -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels steps: - name: Download artifact @@ -121,7 +121,7 @@ jobs: strategy: matrix: - python: ['3.10'] + python: ['3.10', '3.11', '3.12'] env: conda-bld: C:\Miniconda\conda-bld\win-64\ steps: @@ -147,7 +147,7 @@ jobs: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - name: Build conda package - run: conda build --no-test --python ${{ matrix.python }} -c intel -c conda-forge --override-channels conda-recipe + run: conda build --no-test --python ${{ matrix.python }} -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels conda-recipe-cf - name: Upload artifact uses: actions/upload-artifact@v3 with: @@ -162,13 +162,13 @@ jobs: shell: cmd /C CALL {0} strategy: matrix: - python: ['3.10'] + python: ['3.10', '3.11', '3.12'] experimental: [false] runner: [windows-latest] continue-on-error: ${{ matrix.experimental }} env: workdir: '${{ github.workspace }}' - CHANNELS: -c intel -c conda-forge --override-channels + CHANNELS: -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels steps: - name: Download artifact diff --git a/conda-recipe-cf/bld.bat b/conda-recipe-cf/bld.bat new file mode 100644 index 0000000..e27318d --- /dev/null +++ b/conda-recipe-cf/bld.bat @@ -0,0 +1,25 @@ +REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4 +set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%" +set "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" + +"%PYTHON%" setup.py clean --all +set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16 17.0.0 17) DO @( + REM set DIR_HINT if directory exists + IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" ( + SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V" + ) +) + +if NOT "%WHEELS_OUTPUT_FOLDER%"=="" ( + rem Install and assemble wheel package from the build bits + "%PYTHON%" setup.py install bdist_wheel %SKBUILD_ARGS% + if errorlevel 1 exit 1 + copy dist\mkl_umath*.whl %WHEELS_OUTPUT_FOLDER% + if errorlevel 1 exit 1 +) ELSE ( + rem Only install + "%PYTHON%" setup.py install %SKBUILD_ARGS% + if errorlevel 1 exit 1 +) diff --git a/conda-recipe-cf/build.sh b/conda-recipe-cf/build.sh new file mode 100644 index 0000000..fc4459c --- /dev/null +++ b/conda-recipe-cf/build.sh @@ -0,0 +1,23 @@ +# This is necessary to help DPC++ find Intel libraries such as SVML, IRNG, etc in build prefix +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib" + +# Intel LLVM must cooperate with compiler and sysroot from conda +echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icx_for_conda.cfg +export ICXCFG="$(pwd)/icx_for_conda.cfg" + +export CMAKE_GENERATOR="Ninja" +SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then + # Install packages and assemble wheel package from built bits + if [ "$CONDA_PY" == "36" ]; then + WHEELS_BUILD_ARGS="-p manylinux1_x86_64" + else + WHEELS_BUILD_ARGS="-p manylinux2014_x86_64" + fi + ${PYTHON} setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} + cp dist/mkl_umath*.whl ${WHEELS_OUTPUT_FOLDER} +else + # Perform regular install + ${PYTHON} setup.py install ${SKBUILD_ARGS} +fi diff --git a/conda-recipe-cf/meta.yaml b/conda-recipe-cf/meta.yaml new file mode 100644 index 0000000..4ecf657 --- /dev/null +++ b/conda-recipe-cf/meta.yaml @@ -0,0 +1,54 @@ +{% set version = "0.1.2" %} +{% set buildnumber = 0 %} + +package: + name: mkl_umath + version: {{ version }} + +source: + path: ../ + +build: + number: {{ buildnumber }} + ignore_run_exports: + - blas + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - {{ compiler('dpcpp') }} >=2024.2 # [not osx] + - sysroot_linux-64 >=2.28 # [linux] + host: + - setuptools + - cmake + - ninja + - git + - cython + - scikit-build + - python + - mkl-devel + - numpy + run: + - python + - mkl + - mkl-service + - {{ pin_compatible('intel-cmplr-lib-rt') }} + +test: + requires: + - pytest + source_files: + - mkl_umath/tests/test_basic.py + commands: + - pytest mkl_umath/tests/test_basic.py + imports: + - mkl_umath + - mkl_umath._ufuncs + - mkl_umath._patch + +about: + home: http://github.com/IntelPython/mkl_umath + license: BSD-3 + license_file: LICENSE.txt + summary: Universal functions for real and complex floating point arrays powered by Intel(R) Math Kernel Library Vector (Intel(R) MKL) and Intel(R) Short Vector Math Library (Intel(R) SVML) diff --git a/conda-recipe-cf/run_tests.bat b/conda-recipe-cf/run_tests.bat new file mode 100644 index 0000000..590db89 --- /dev/null +++ b/conda-recipe-cf/run_tests.bat @@ -0,0 +1 @@ +%PYTHON% tests\test_basic.py \ No newline at end of file diff --git a/conda-recipe-cf/run_tests.sh b/conda-recipe-cf/run_tests.sh new file mode 100644 index 0000000..7bfca5d --- /dev/null +++ b/conda-recipe-cf/run_tests.sh @@ -0,0 +1 @@ +$PYTHON tests/test_basic.py diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index ec41497..dcafd45 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -17,7 +17,7 @@ requirements: build: - {{ compiler('c') }} - {{ compiler('cxx') }} - - {{ compiler('dpcpp') }} >=2023.2 # [not osx] + - {{ compiler('dpcpp') }} >=2024.2 # [not osx] - sysroot_linux-64 >=2.28 # [linux] host: - setuptools From aa3876fc4616e0534d3e406edb5484e031e02737 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 18 Sep 2024 09:08:49 -0500 Subject: [PATCH 27/38] Fix issue with Windows build/test steps --- .github/workflows/conda-package.yml | 56 ++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 0eefa64..4d81eb2 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -20,6 +20,7 @@ jobs: - name: Set pkgs_dirs run: | echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + - name: Cache conda packages uses: actions/cache@v3 env: @@ -34,24 +35,28 @@ jobs: - name: Add conda to system path run: echo $CONDA/bin >> $GITHUB_PATH + - name: Install conda-build run: conda install conda-build + - name: Build conda package run: | CHANNELS="-c conda-forge -c https://software.repos.intel.com/python/conda --override-channels" VERSIONS="--python ${{ matrix.python }}" TEST="--no-test" + echo "CONDA_BLD=${CONDA}/conda-bld/linux-64" >> $GITHUB_ENV conda build \ $TEST \ $VERSIONS \ $CHANNELS \ conda-recipe-cf + - name: Upload artifact uses: actions/upload-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - path: /usr/share/miniconda/conda-bld/linux-64/${{ env.PACKAGE_NAME }}-*.tar.bz2 + path: ${{ env.CONDA_BLD }}/${{ env.PACKAGE_NAME }}-*.tar.bz2 test: needs: build @@ -89,9 +94,11 @@ jobs: conda create -n test_mkl_umath $PACKAGE_NAME python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile - name: Display lockfile run: cat lockfile + - name: Set pkgs_dirs run: | echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + - name: Cache conda packages uses: actions/cache@v3 env: @@ -110,6 +117,7 @@ jobs: conda create -n test_mkl_umath python=${{ matrix.python }} $PACKAGE_NAME pytest $CHANNELS # Test installed packages conda list -n test_mkl_umath + - name: Run tests run: | source $CONDA/etc/profile.d/conda.sh @@ -128,11 +136,13 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v2 + + - uses: conda-incubator/setup-miniconda@v3 with: - auto-activate-base: true - conda-build-version: "*" - activate-environment: true + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: build + channels: conda-forge python-version: ${{ matrix.python }} - name: Cache conda packages @@ -146,8 +156,23 @@ jobs: restore-keys: | ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Store conda paths as envs + shell: bash -l {0} + run: | + echo "CONDA_BLD=$CONDA/conda-bld/win-64/" | tr "\\\\" '/' >> $GITHUB_ENV + + - name: Install conda build + run: | + conda activate + conda install -y conda-build + conda list -n base + - name: Build conda package - run: conda build --no-test --python ${{ matrix.python }} -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels conda-recipe-cf + run: | + conda activate + conda build --no-test --python ${{ matrix.python }} -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels conda-recipe-cf + - name: Upload artifact uses: actions/upload-artifact@v3 with: @@ -175,13 +200,14 @@ jobs: uses: actions/download-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: - auto-update-conda: true - conda-build-version: '*' - miniconda-version: 'latest' - activate-environment: mkl_umath_test + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: build + channels: conda-forge python-version: ${{ matrix.python }} + - name: Create conda channel with the artifact bit shell: cmd /C CALL {0} run: | @@ -189,13 +215,17 @@ jobs: mkdir ${{ env.workdir }}\channel\win-64 move ${{ env.PACKAGE_NAME }}-*.tar.bz2 ${{ env.workdir }}\channel\win-64 dir ${{ env.workdir }}\channel\win-64 + - name: Index the channel shell: cmd /C CALL {0} - run: conda index ${{ env.workdir }}\channel + run: | + conda activate + conda index ${{ env.workdir }}\channel - name: Dump mkl_umath version info from created channel into ver.json shell: cmd /C CALL {0} run: | + conda activate conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json > ${{ env.workdir }}\ver.json - name: Output content of produced ver.json shell: pwsh @@ -210,6 +240,7 @@ jobs: FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( SET PACKAGE_VERSION=%%F ) + conda activate conda install -n mkl_umath_test ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} --only-deps --dry-run > lockfile - name: Display lockfile content shell: pwsh @@ -241,6 +272,7 @@ jobs: - name: Report content of test environment shell: cmd /C CALL {0} run: | + conda activate echo "Value of CONDA enviroment variable was: " %CONDA% echo "Value of CONDA_PREFIX enviroment variable was: " %CONDA_PREFIX% conda info && conda list -n mkl_umath_test From 3025b1bad9094e2142c3d06fa014a5295dabb4f5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 18 Sep 2024 15:15:02 -0500 Subject: [PATCH 28/38] Use Windows-2019 container over windows-latest --- .github/workflows/conda-package.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 4d81eb2..a4ac110 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -125,7 +125,7 @@ jobs: python -c "import mkl_umath, numpy as np; mkl_umath.use_in_numpy(); np.sin(np.linspace(0, 1, num=10**6));" build_windows: - runs-on: windows-latest + runs-on: windows-2019 strategy: matrix: @@ -189,7 +189,7 @@ jobs: matrix: python: ['3.10', '3.11', '3.12'] experimental: [false] - runner: [windows-latest] + runner: [windows-2019] continue-on-error: ${{ matrix.experimental }} env: workdir: '${{ github.workspace }}' From a98eae4c72c42911f87d8530f1a2818eca196461 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 23 Sep 2024 15:20:22 -0500 Subject: [PATCH 29/38] Add CODEOWNERS file --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..05c1669 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @oleksandr-pavlyk @xaleryb @ekomarova From 2b584cf29114d7d704911a37fd8c642c4e2b3e15 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 23 Sep 2024 15:20:32 -0500 Subject: [PATCH 30/38] Add dependabot file --- .github/dependabot.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..5ace460 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" From 8ff6665d087c77e72c9c012cc7ded482357e8689 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 23 Sep 2024 15:20:47 -0500 Subject: [PATCH 31/38] Add OpenSSF scorecard workflow --- .github/workflows/openssf-scorecard.yml | 74 +++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 .github/workflows/openssf-scorecard.yml diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml new file mode 100644 index 0000000..586f7bc --- /dev/null +++ b/.github/workflows/openssf-scorecard.yml @@ -0,0 +1,74 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '28 2 * * 1' + - cron: '28 2 * * 4' + push: + branches: [ "master" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: SARIF file + path: results.sarif + retention-days: 14 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@294a9d92911152fe08befb9ec03e240add280cb3 # v3.26.8 + with: + sarif_file: results.sarif From a2d618299e1afdd3276d5fec2dff8fecc8a409cb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 23 Sep 2024 16:14:01 -0500 Subject: [PATCH 32/38] Fixed upload of Windows build artifact --- .github/workflows/conda-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index a4ac110..6f96bfa 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -177,7 +177,7 @@ jobs: uses: actions/upload-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 + path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 test_windows: needs: build_windows From 5270b7c9ad6258616688273a9e56081fb0b7d16d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 23 Sep 2024 16:46:05 -0500 Subject: [PATCH 33/38] Add a step to output content of workdir --- .github/workflows/conda-package.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 6f96bfa..f4a7206 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -222,11 +222,19 @@ jobs: conda activate conda index ${{ env.workdir }}\channel + - name: Dump mkl_umath version info from created channel to STDOUT + shell: cmd /C CALL {0} + run: | + conda activate + conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json - name: Dump mkl_umath version info from created channel into ver.json shell: cmd /C CALL {0} run: | conda activate conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json > ${{ env.workdir }}\ver.json + - name: Output content of workdir + shell: pwsh + run: Get-ChildItem -Path ${{ env.workdir }} - name: Output content of produced ver.json shell: pwsh run: Get-Content -Path ${{ env.workdir }}\ver.json From ff3bc26a698b1b0f65bc4469c635d33b7a675d15 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 23 Sep 2024 17:24:58 -0500 Subject: [PATCH 34/38] Add SECURITY.md file --- SECURITY.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..556938b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,12 @@ +# Security Policy + +## Report a Vulnerability + +Please report security issues or vulnerabilities to the [Intel® Security Center]. + +For more information on how Intel® works to resolve security issues, see +[Vulnerability Handling Guidelines]. + +[Intel® Security Center]:https://www.intel.com/content/www/us/en/security-center/default.html + +[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html From 535e68a37ad4ba7d882ad7198d0f7dee324daf2f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Sep 2024 07:24:36 -0500 Subject: [PATCH 35/38] Attempt to fix test step for Windows --- .github/workflows/conda-package.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index f4a7206..6a25b70 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -200,11 +200,14 @@ jobs: uses: actions/download-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + - uses: conda-incubator/setup-miniconda@v3 with: + auto-update-conda: true + conda-build-version: '*' miniforge-variant: Miniforge3 miniforge-version: latest - activate-environment: build + activate-environment: mkl_umath_test channels: conda-forge python-version: ${{ matrix.python }} @@ -219,18 +222,15 @@ jobs: - name: Index the channel shell: cmd /C CALL {0} run: | - conda activate conda index ${{ env.workdir }}\channel - name: Dump mkl_umath version info from created channel to STDOUT shell: cmd /C CALL {0} run: | - conda activate conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json - name: Dump mkl_umath version info from created channel into ver.json shell: cmd /C CALL {0} run: | - conda activate conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json > ${{ env.workdir }}\ver.json - name: Output content of workdir shell: pwsh @@ -248,7 +248,6 @@ jobs: FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( SET PACKAGE_VERSION=%%F ) - conda activate conda install -n mkl_umath_test ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} --only-deps --dry-run > lockfile - name: Display lockfile content shell: pwsh From 030e1620b0d567502ce8ebc3be4d93f148b09644 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Sep 2024 08:22:49 -0500 Subject: [PATCH 36/38] Replace use of "-c intel" in the README. --- README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 006aa88..0e2bd0b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Patches were factored out per community feedback ([NEP-36](https://numpy.org/nep as a stand-alone package. It can be installed into conda environment using ``` - conda install -c intel mkl_umath + conda install -c https://software.repos.intel.com/python/conda mkl_umath ``` --- @@ -18,17 +18,9 @@ as a stand-alone package. It can be installed into conda environment using To install mkl_umath Pypi package please use following command: ``` - python -m pip install --i https://pypi.anaconda.org/intel/simple -extra-index-url https://pypi.org/simple mkl_umath + python -m pip install mkl_umath ``` -If command above installs NumPy package from the Pypi, please use following command to install Intel optimized NumPy wheel package from Anaconda Cloud: - -``` - python -m pip install --i https://pypi.anaconda.org/intel/simple -extra-index-url https://pypi.org/simple mkl_umath numpy== -``` - -Where `` should be the latest version from https://anaconda.org/intel/numpy - --- ## Building From 6c8c1dbda2cd77df3730639c7ef84c151f6bf5e7 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Sep 2024 09:43:50 -0500 Subject: [PATCH 37/38] Bump up versions of actions per GH warnings --- .github/workflows/conda-package.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 6a25b70..0c5bd0b 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -13,7 +13,7 @@ jobs: matrix: python: ['3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4.1.7 with: fetch-depth: 0 @@ -22,7 +22,7 @@ jobs: echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v4 env: CACHE_NUMBER: 0 # Increase to reset cache with: @@ -53,7 +53,7 @@ jobs: conda-recipe-cf - name: Upload artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4.4.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.CONDA_BLD }}/${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -73,7 +73,7 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - name: Add conda to system path @@ -100,7 +100,7 @@ jobs: echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v4 env: CACHE_NUMBER: 0 # Increase to reset cache with: @@ -133,7 +133,7 @@ jobs: env: conda-bld: C:\Miniconda\conda-bld\win-64\ steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4.1.7 with: fetch-depth: 0 @@ -146,7 +146,7 @@ jobs: python-version: ${{ matrix.python }} - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v4 env: CACHE_NUMBER: 3 # Increase to reset cache with: @@ -174,7 +174,7 @@ jobs: conda build --no-test --python ${{ matrix.python }} -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels conda-recipe-cf - name: Upload artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4.4.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -197,7 +197,7 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} @@ -253,7 +253,7 @@ jobs: shell: pwsh run: Get-Content -Path .\lockfile - name: Cache conda packages - uses: actions/cache@v3 + uses: actions/cache@v4 env: CACHE_NUMBER: 0 # Increase to reset cache with: From 6e73a4cee03ed70bbe9faf169d7c710899b6caca Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Sep 2024 09:46:02 -0500 Subject: [PATCH 38/38] Fix build.sh per review comment --- conda-recipe-cf/build.sh | 6 +----- conda-recipe/build.sh | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/conda-recipe-cf/build.sh b/conda-recipe-cf/build.sh index fc4459c..2792f27 100644 --- a/conda-recipe-cf/build.sh +++ b/conda-recipe-cf/build.sh @@ -10,11 +10,7 @@ SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then # Install packages and assemble wheel package from built bits - if [ "$CONDA_PY" == "36" ]; then - WHEELS_BUILD_ARGS="-p manylinux1_x86_64" - else - WHEELS_BUILD_ARGS="-p manylinux2014_x86_64" - fi + WHEELS_BUILD_ARGS="-p manylinux_${GLIBC_MAJOR}_${GLIBC_MINOR}_x86_64" ${PYTHON} setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} cp dist/mkl_umath*.whl ${WHEELS_OUTPUT_FOLDER} else diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh index fc4459c..2792f27 100644 --- a/conda-recipe/build.sh +++ b/conda-recipe/build.sh @@ -10,11 +10,7 @@ SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then # Install packages and assemble wheel package from built bits - if [ "$CONDA_PY" == "36" ]; then - WHEELS_BUILD_ARGS="-p manylinux1_x86_64" - else - WHEELS_BUILD_ARGS="-p manylinux2014_x86_64" - fi + WHEELS_BUILD_ARGS="-p manylinux_${GLIBC_MAJOR}_${GLIBC_MINOR}_x86_64" ${PYTHON} setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} cp dist/mkl_umath*.whl ${WHEELS_OUTPUT_FOLDER} else