From 1b807949032fcfdd276e9b2c2fb5fe5f768a9bd1 Mon Sep 17 00:00:00 2001 From: Wenbing Li <10278425+wenbingl@users.noreply.github.com> Date: Wed, 4 Sep 2024 16:50:05 -0700 Subject: [PATCH] Remove OpenCV dependency from C_API mode (#800) * Remove OpenCV dependency from C_API model * fix build on Windows * switch ci build flag * try to fix the macOS build issue * more fixing * fix the macOS build issue * list jpeg source * verified on MacOS * update the pp_api too * avoid the codecs library conflicts * Add the unit tests * move the codec test * add the missing dl lib for extensions test * refine the code * a smaller fixing for Windows Python --- .pipelines/ci.yml | 6 +- .pyproject/cmdclass.py | 3 + CMakeLists.txt | 10 +- MANIFEST.in | 8 -- build.sh | 2 +- cmake/ext_imgcodecs.cmake | 131 +++++++++++++++++++++++ cmake/ext_tests.cmake | 4 + cmake/presets/ort_genai.cmake | 2 - include/ortx_utils.h | 11 ++ onnxruntime_extensions/pp_api.py | 8 ++ pyop/py_c_api.cc | 15 ++- shared/api/c_api_utils.cc | 27 ++++- shared/api/c_api_utils.hpp | 60 ++++++++--- shared/api/image_decoder.hpp | 145 ++++++++++++++++++++++++++ shared/api/image_processor.cc | 4 +- test/data/processor/image_to_numpy.py | 41 ++++++++ test/data/processor/proctest.py | 76 ++++++++++++++ test/pp_api_test/test_imgcodec.cc | 81 ++++++++++++++ 18 files changed, 589 insertions(+), 45 deletions(-) create mode 100644 cmake/ext_imgcodecs.cmake create mode 100644 shared/api/image_decoder.hpp create mode 100644 test/data/processor/image_to_numpy.py create mode 100644 test/data/processor/proctest.py create mode 100644 test/pp_api_test/test_imgcodec.cc diff --git a/.pipelines/ci.yml b/.pipelines/ci.yml index e39ef96bd..a54f1d47a 100644 --- a/.pipelines/ci.yml +++ b/.pipelines/ci.yml @@ -197,7 +197,7 @@ stages: # compiled as only one operator selected. - bash: | set -e -x -u - ./build.sh -DOCOS_ENABLE_C_API=ON + ./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF cd out/Linux/RelWithDebInfo ctest -C RelWithDebInfo --output-on-failure displayName: Build ort-extensions with API enabled and run tests @@ -281,7 +281,7 @@ stages: # compiled as only one operator selected. - bash: | set -e -x -u - ./build.sh -DOCOS_ENABLE_C_API=ON + ./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF cd out/Darwin/RelWithDebInfo ctest -C RelWithDebInfo --output-on-failure displayName: Build ort-extensions with API enabled and run tests @@ -431,7 +431,7 @@ stages: steps: - script: | - call .\build.bat -DOCOS_ENABLE_C_API=ON + call .\build.bat -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF cd out\Windows ctest -C RelWithDebInfo --output-on-failure displayName: Build ort-extensions with API enabled and run tests diff --git a/.pyproject/cmdclass.py b/.pyproject/cmdclass.py index 3608dfc7a..3d2d78f00 100644 --- a/.pyproject/cmdclass.py +++ b/.pyproject/cmdclass.py @@ -212,6 +212,9 @@ def build_cmake(self, extension): '-DOCOS_ENABLE_VISION=OFF'] if self.pp_api: + if not self.no_opencv: + raise RuntimeError( + "Cannot enable PP C API Python Wrapper without disabling OpenCV.") cmake_args += ['-DOCOS_ENABLE_C_API=ON'] if self.no_azure is not None: diff --git a/CMakeLists.txt b/CMakeLists.txt index 12e54a52d..cbceb4b34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -206,7 +206,6 @@ if(NOT PROJECT_IS_TOP_LEVEL AND ONNXRUNTIME_ROOT) set(_ONNXRUNTIME_EMBEDDED TRUE) endif() - if (OCOS_ENABLE_SELECTED_OPLIST OR OCOS_BUILD_PRESET) disable_all_operators() if(OCOS_ENABLE_SELECTED_OPLIST) @@ -737,9 +736,12 @@ if(OCOS_ENABLE_C_API) file(GLOB audio_TARGET_SRC "shared/api/c_api_feature_extraction.*" "shared/api/speech_*") list(APPEND _TARGET_LIB_SRC ${audio_TARGET_SRC}) endif() - if(OCOS_ENABLE_CV2) + if(OCOS_ENABLE_DLIB) + include(ext_imgcodecs) file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*") list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC}) + target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR}) + target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY} ${ZLIB_LIBRARY}) endif() endif() @@ -852,8 +854,8 @@ target_link_libraries(ortcustomops PUBLIC ocos_operators) if(OCOS_BUILD_SHARED_LIB) file(GLOB shared_TARGET_SRC "shared/*.cc" "shared/*.h") if (OCOS_ENABLE_C_API) - if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_CV2 OR NOT OCOS_ENABLE_AUDIO) - message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, CV2 and AUDIO to be enabled.") + if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_AUDIO) + message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, AUDIO to be enabled.") endif() list(APPEND shared_TARGET_SRC "shared/extensions_c.def") else() diff --git a/MANIFEST.in b/MANIFEST.in index 43d7ac613..9d81ae414 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,11 +5,3 @@ recursive-include include *.* recursive-include operators *.* recursive-include pyop *.* recursive-include shared *.* -prune ci_build -prune docs -prune test -prune _subbuild -prune out -exclude *.bat -exclude *.yaml -exclude *.git* diff --git a/build.sh b/build.sh index a6b310ed0..ad44194b3 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,7 @@ #!/bin/bash # The example build script to build the source in Linux-like platform -set -e -x -u +set -e -u cuda_arch='' if [[ $@ == *"DOCOS_USE_CUDA=ON"* && $@ != *"DCMAKE_CUDA_ARCHITECTURES"* ]]; then diff --git a/cmake/ext_imgcodecs.cmake b/cmake/ext_imgcodecs.cmake new file mode 100644 index 000000000..70f8adcb7 --- /dev/null +++ b/cmake/ext_imgcodecs.cmake @@ -0,0 +1,131 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +set(_IMGCODEC_ROOT_DIR ${dlib_SOURCE_DIR}/dlib/external) + +# ---------------------------------------------------------------------------- +# project libpng +# +# ---------------------------------------------------------------------------- +set (PNG_LIBRARY "libpng_static_c") +set (libPNG_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/libpng) +set (zlib_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/zlib) + +if(NOT WIN32) + find_library(M_LIBRARY + NAMES m + PATHS /usr/lib /usr/local/lib + ) + if(NOT M_LIBRARY) + message(STATUS "math lib 'libm' not found; floating point support disabled") + endif() +else() + # not needed on windows + set(M_LIBRARY "") +endif() + +set(lib_srcs + ${libPNG_SOURCE_DIR}/arm/arm_init.c + ${libPNG_SOURCE_DIR}/arm/filter_neon_intrinsics.c + ${libPNG_SOURCE_DIR}/arm/palette_neon_intrinsics.c + ${libPNG_SOURCE_DIR}//png.c + ${libPNG_SOURCE_DIR}//pngerror.c + ${libPNG_SOURCE_DIR}//pngget.c + ${libPNG_SOURCE_DIR}//pngmem.c + ${libPNG_SOURCE_DIR}//pngpread.c + ${libPNG_SOURCE_DIR}//pngread.c + ${libPNG_SOURCE_DIR}//pngrio.c + ${libPNG_SOURCE_DIR}//pngrtran.c + ${libPNG_SOURCE_DIR}//pngrutil.c + ${libPNG_SOURCE_DIR}//pngset.c + ${libPNG_SOURCE_DIR}//pngtrans.c + ${libPNG_SOURCE_DIR}//pngwio.c + ${libPNG_SOURCE_DIR}//pngwrite.c + ${libPNG_SOURCE_DIR}//pngwtran.c + ${libPNG_SOURCE_DIR}//pngwutil.c + ${zlib_SOURCE_DIR}/adler32.c + ${zlib_SOURCE_DIR}/compress.c + ${zlib_SOURCE_DIR}/crc32.c + ${zlib_SOURCE_DIR}/deflate.c + ${zlib_SOURCE_DIR}/gzclose.c + ${zlib_SOURCE_DIR}/gzlib.c + ${zlib_SOURCE_DIR}/gzread.c + ${zlib_SOURCE_DIR}/gzwrite.c + ${zlib_SOURCE_DIR}/infback.c + ${zlib_SOURCE_DIR}/inffast.c + ${zlib_SOURCE_DIR}/inflate.c + ${zlib_SOURCE_DIR}/inftrees.c + ${zlib_SOURCE_DIR}/trees.c + ${zlib_SOURCE_DIR}/uncompr.c + ${zlib_SOURCE_DIR}/zutil.c +) + +add_library(${PNG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs}) +target_include_directories(${PNG_LIBRARY} BEFORE PRIVATE ${zlib_SOURCE_DIR}) + +if(MSVC) + target_compile_definitions(${PNG_LIBRARY} PRIVATE -D_CRT_SECURE_NO_DEPRECATE) +else() + target_compile_options(${PNG_LIBRARY} PRIVATE -Wno-deprecated-non-prototype) +endif() + +# ---------------------------------------------------------------------------- +# project libjpeg +# +# ---------------------------------------------------------------------------- +set(JPEG_LIBRARY "libjpeg_static_c") +set(libJPEG_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/libjpeg) + +set(lib_srcs + ${libJPEG_SOURCE_DIR}/jaricom.c + ${libJPEG_SOURCE_DIR}/jcapimin.c + ${libJPEG_SOURCE_DIR}/jcapistd.c + ${libJPEG_SOURCE_DIR}/jcarith.c + ${libJPEG_SOURCE_DIR}/jccoefct.c + ${libJPEG_SOURCE_DIR}/jccolor.c + ${libJPEG_SOURCE_DIR}/jcdctmgr.c + ${libJPEG_SOURCE_DIR}/jchuff.c + ${libJPEG_SOURCE_DIR}/jcinit.c + ${libJPEG_SOURCE_DIR}/jcmainct.c + ${libJPEG_SOURCE_DIR}/jcmarker.c + ${libJPEG_SOURCE_DIR}/jcmaster.c + ${libJPEG_SOURCE_DIR}/jcomapi.c + ${libJPEG_SOURCE_DIR}/jcparam.c + ${libJPEG_SOURCE_DIR}/jcprepct.c + ${libJPEG_SOURCE_DIR}/jcsample.c + ${libJPEG_SOURCE_DIR}/jdapimin.c + ${libJPEG_SOURCE_DIR}/jdapistd.c + ${libJPEG_SOURCE_DIR}/jdarith.c + ${libJPEG_SOURCE_DIR}/jdatadst.c + ${libJPEG_SOURCE_DIR}/jdatasrc.c + ${libJPEG_SOURCE_DIR}/jdcoefct.c + ${libJPEG_SOURCE_DIR}/jdcolor.c + ${libJPEG_SOURCE_DIR}/jddctmgr.c + ${libJPEG_SOURCE_DIR}/jdhuff.c + ${libJPEG_SOURCE_DIR}/jdinput.c + ${libJPEG_SOURCE_DIR}/jdmainct.c + ${libJPEG_SOURCE_DIR}/jdmarker.c + ${libJPEG_SOURCE_DIR}/jdmaster.c + ${libJPEG_SOURCE_DIR}/jdmerge.c + ${libJPEG_SOURCE_DIR}/jdpostct.c + ${libJPEG_SOURCE_DIR}/jdsample.c + ${libJPEG_SOURCE_DIR}/jerror.c + ${libJPEG_SOURCE_DIR}/jfdctflt.c + ${libJPEG_SOURCE_DIR}/jfdctfst.c + ${libJPEG_SOURCE_DIR}/jfdctint.c + ${libJPEG_SOURCE_DIR}/jidctflt.c + ${libJPEG_SOURCE_DIR}/jidctfst.c + ${libJPEG_SOURCE_DIR}/jidctint.c + ${libJPEG_SOURCE_DIR}/jmemmgr.c + ${libJPEG_SOURCE_DIR}/jmemnobs.c + ${libJPEG_SOURCE_DIR}/jquant1.c + ${libJPEG_SOURCE_DIR}/jquant2.c + ${libJPEG_SOURCE_DIR}/jutils.c + ) +file(GLOB lib_hdrs ${libJPEG_SOURCE_DIR}/*.h) +add_library(${JPEG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs} ${lib_hdrs}) + +if(NOT MSVC) + set_source_files_properties(jcdctmgr.c PROPERTIES COMPILE_FLAGS "-O1") +endif() +target_compile_definitions(${JPEG_LIBRARY} PRIVATE -DNO_MKTEMP) diff --git a/cmake/ext_tests.cmake b/cmake/ext_tests.cmake index 436125b81..b292279b8 100644 --- a/cmake/ext_tests.cmake +++ b/cmake/ext_tests.cmake @@ -189,6 +189,10 @@ if (OCOS_BUILD_SHARED_LIB) list(APPEND extensions_test_libraries stdc++fs -pthread) endif() + if (NOT MSVC) + list(APPEND extensions_test_libraries ${CMAKE_DL_LIBS}) + endif() + add_test_target(TARGET extensions_test TEST_SOURCES ${shared_TEST_SRC} LIBRARIES ${extensions_test_libraries} diff --git a/cmake/presets/ort_genai.cmake b/cmake/presets/ort_genai.cmake index e1ecb5e98..2ed162bf5 100644 --- a/cmake/presets/ort_genai.cmake +++ b/cmake/presets/ort_genai.cmake @@ -3,8 +3,6 @@ set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE) set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE) -set(OCOS_ENABLE_CV2 ON CACHE INTERNAL "" FORCE) -set(OCOS_ENABLE_OPENCV_CODECS ON CACHE INTERNAL "" FORCE) set(OCOS_ENABLE_DLIB ON CACHE INTERNAL "" FORCE) set(OCOS_ENABLE_MATH ON CACHE INTERNAL "" FORCE) set(OCOS_ENABLE_AUDIO ON CACHE INTERNAL "" FORCE) diff --git a/include/ortx_utils.h b/include/ortx_utils.h index 0e6b951d7..e533650b8 100644 --- a/include/ortx_utils.h +++ b/include/ortx_utils.h @@ -105,6 +105,17 @@ extError_t ORTX_API_CALL OrtxTensorResultGetAt(OrtxTensorResult* result, size_t */ extError_t ORTX_API_CALL OrtxGetTensorType(OrtxTensor* tensor, extDataType_t* type); +/** + * @brief Retrieves the size of each element in the given tensor. + * + * This function calculates the size of each element in the specified tensor and stores it in the provided size variable. + * + * @param tensor A pointer to the OrtxTensor object. + * @param size A pointer to a size_t variable to store the size of each element. + * @return An extError_t value indicating the success or failure of the operation. + */ +extError_t ORTX_API_CALL OrtxGetTensorSizeOfElement(OrtxTensor* tensor, size_t* size); + /** \brief Get the data from the tensor * * \param tensor The tensor object diff --git a/onnxruntime_extensions/pp_api.py b/onnxruntime_extensions/pp_api.py index f30b742fd..4e3c05595 100644 --- a/onnxruntime_extensions/pp_api.py +++ b/onnxruntime_extensions/pp_api.py @@ -65,8 +65,16 @@ def __init__(self, processor_json): self.processor = create_processor(processor_json) def pre_process(self, images): + if isinstance(images, str): + images = [images] + if isinstance(images, list): + images = load_images(images) return image_pre_process(self.processor, images) + @staticmethod + def to_numpy(result): + return tensor_result_get_at(result, 0) + def __del__(self): if delete_object and self.processor: delete_object(self.processor) diff --git a/pyop/py_c_api.cc b/pyop/py_c_api.cc index d1854072b..c2f57b561 100644 --- a/pyop/py_c_api.cc +++ b/pyop/py_c_api.cc @@ -85,15 +85,12 @@ void AddGlobalMethodsCApi(pybind11::module& m) { const int64_t* shape{}; size_t num_dims; const void* data{}; - size_t elem_size = 0; - if (tensor_type == extDataType_t::kOrtxInt64 || tensor_type == extDataType_t::kOrtxFloat) { + size_t elem_size = 1; + if (tensor_type == extDataType_t::kOrtxInt64 || + tensor_type == extDataType_t::kOrtxFloat || + tensor_type == extDataType_t::kOrtxUint8) { OrtxGetTensorData(tensor, reinterpret_cast(&data), &shape, &num_dims); - elem_size = 4; - if (tensor_type == extDataType_t::kOrtxInt64) { - elem_size = 8; - } - } else if (tensor_type == extDataType_t::kOrtxUnknownType) { - throw std::runtime_error("Failed to get tensor type"); + OrtxGetTensorSizeOfElement(tensor, &elem_size); } else if (tensor_type == extDataType_t::kOrtxUnknownType) { throw std::runtime_error("unsupported tensor type"); } @@ -108,6 +105,8 @@ void AddGlobalMethodsCApi(pybind11::module& m) { obj = py::array_t(npy_dims); } else if (tensor_type == extDataType_t::kOrtxInt64) { obj = py::array_t(npy_dims); + } else if (tensor_type == extDataType_t::kOrtxUint8) { + obj = py::array_t(npy_dims); } void* out_ptr = obj.mutable_data(); diff --git a/shared/api/c_api_utils.cc b/shared/api/c_api_utils.cc index 9db7b1bde..feebe4448 100644 --- a/shared/api/c_api_utils.cc +++ b/shared/api/c_api_utils.cc @@ -103,7 +103,6 @@ extError_t ORTX_API_CALL OrtxTensorResultGetAt(OrtxTensorResult* result, size_t auto tensor_ptr = std::make_unique(); tensor_ptr->SetTensor(ts); - tensor_ptr->SetTensorType(result_ptr->GetTensorType(index)); *tensor = static_cast(tensor_ptr.release()); return extError_t(); } @@ -124,6 +123,24 @@ extError_t ORTX_API_CALL OrtxGetTensorType(OrtxTensor* tensor, extDataType_t* ty return extError_t(); } +extError_t ORTX_API_CALL OrtxGetTensorSizeOfElement(OrtxTensor* tensor, size_t* size) { + if (tensor == nullptr || size == nullptr) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + auto tensor_impl = static_cast(tensor); + if (tensor_impl->ortx_kind() != extObjectKind_t::kOrtxKindTensor) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + auto tb = tensor_impl->GetTensor(); + assert(tb != nullptr); + *size = tb->SizeInBytes() / tb->NumberOfElement(); + return extError_t(); +} + extError_t ORTX_API_CALL OrtxGetTensorData(OrtxTensor* tensor, const void** data, const int64_t** shape, size_t* num_dims) { if (tensor == nullptr) { @@ -158,3 +175,11 @@ extError_t ORTX_API_CALL OrtxGetTensorDataFloat(OrtxTensor* tensor, const float* *data = reinterpret_cast(data_ptr); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) return err; } + +extError_t ORTX_API_CALL OrtxGetTensorDataUint8(OrtxTensor* tensor, const uint8_t** data, const int64_t** shape, + size_t* num_dims) { + const void* data_ptr{}; + auto err = OrtxGetTensorData(tensor, &data_ptr, shape, num_dims); + *data = reinterpret_cast(data_ptr); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + return err; +} diff --git a/shared/api/c_api_utils.hpp b/shared/api/c_api_utils.hpp index 46bd79ab3..37e749d1b 100644 --- a/shared/api/c_api_utils.hpp +++ b/shared/api/c_api_utils.hpp @@ -99,15 +99,56 @@ class TensorObject : public OrtxObjectImpl { ~TensorObject() override = default; void SetTensor(ortc::TensorBase* tensor) { tensor_ = tensor; } - void SetTensorType(extDataType_t type) { tensor_type_ = type; } - [[nodiscard]] extDataType_t GetTensorType() const { return tensor_type_; } + static extDataType_t GetDataType(ONNXTensorElementDataType dt) { + if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { + return extDataType_t::kOrtxFloat; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) { + return extDataType_t::kOrtxUint8; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) { + return extDataType_t::kOrtxInt8; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) { + return extDataType_t::kOrtxUint16; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16) { + return extDataType_t::kOrtxInt16; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { + return extDataType_t::kOrtxInt32; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { + return extDataType_t::kOrtxInt64; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) { + return extDataType_t::kOrtxString; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) { + return extDataType_t::kOrtxBool; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) { + return extDataType_t::kOrtxFloat16; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { + return extDataType_t::kOrtxDouble; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32) { + return extDataType_t::kOrtxUint32; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64) { + return extDataType_t::kOrtxUint64; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64) { + return extDataType_t::kOrtxComplex64; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128) { + return extDataType_t::kOrtxComplex128; + } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16) { + return extDataType_t::kOrtxBFloat16; + } else { + return extDataType_t::kOrtxUnknownType; + } + } + + [[nodiscard]] extDataType_t GetTensorType() const { + if (tensor_ == nullptr) { + return extDataType_t::kOrtxUnknownType; + } + return GetDataType(tensor_->Type()); + } [[nodiscard]] ortc::TensorBase* GetTensor() const { return tensor_; } private: ortc::TensorBase* tensor_{}; - extDataType_t tensor_type_{extDataType_t::kOrtxUnknownType}; }; class TensorResult : public OrtxObjectImpl { @@ -116,13 +157,8 @@ class TensorResult : public OrtxObjectImpl { ~TensorResult() override = default; void SetTensors(std::vector>&& tensors) { tensors_ = std::move(tensors); } - void SetTensorTypes(const std::vector& types) { tensor_types_ = types; } [[nodiscard]] size_t NumTensors() const { return tensors_.size(); } - - [[nodiscard]] const std::vector& tensor_types() const { return tensor_types_; } - [[nodiscard]] const std::vector>& tensors() const { return tensors_; } - [[nodiscard]] std::vector GetTensors() const { std::vector ts; ts.reserve(tensors_.size()); @@ -139,16 +175,8 @@ class TensorResult : public OrtxObjectImpl { return nullptr; } - extDataType_t GetTensorType(size_t i) const { - if (i < tensor_types_.size()) { - return tensor_types_[i]; - } - return extDataType_t::kOrtxUnknownType; - } - private: std::vector> tensors_; - std::vector tensor_types_; }; struct ReturnableStatus { diff --git a/shared/api/image_decoder.hpp b/shared/api/image_decoder.hpp new file mode 100644 index 000000000..bc588e539 --- /dev/null +++ b/shared/api/image_decoder.hpp @@ -0,0 +1,145 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include + +#include "png.h" +#include "jpeglib.h" +#include "op_def_struct.h" +#include "ext_status.h" + +class JMemorySourceManager : public jpeg_source_mgr { + public: + // Constructor + JMemorySourceManager(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len) { + // Initialize source fields + next_input_byte = reinterpret_cast(encoded_image_data); + bytes_in_buffer = static_cast(encoded_image_data_len); + init_source = &JMemorySourceManager::initSource; + fill_input_buffer = &JMemorySourceManager::fillInputBuffer; + skip_input_data = &JMemorySourceManager::skipInputData; + resync_to_restart = jpeg_resync_to_restart; + term_source = &JMemorySourceManager::termSource; + } + + // Initialize source (no-op) + static void initSource(j_decompress_ptr cinfo) { + // No initialization needed + } + + // Fill input buffer (not used here, always return FALSE) + static boolean fillInputBuffer(j_decompress_ptr cinfo) { + return FALSE; // Buffer is managed manually + } + + // Skip input data + static void skipInputData(j_decompress_ptr cinfo, long num_bytes) { + JMemorySourceManager* srcMgr = reinterpret_cast(cinfo->src); + if (num_bytes > 0) { + size_t bytes_to_skip = static_cast(num_bytes); + while (bytes_to_skip > srcMgr->bytes_in_buffer) { + bytes_to_skip -= srcMgr->bytes_in_buffer; + if (srcMgr->fillInputBuffer(cinfo)) { + // Error: buffer ran out + srcMgr->extError = kOrtxErrorCorruptData; + } + } + srcMgr->next_input_byte += bytes_to_skip; + srcMgr->bytes_in_buffer -= bytes_to_skip; + } + } + + // Terminate source (no-op) + static void termSource(j_decompress_ptr cinfo) { + // No cleanup needed + } + + extError_t extError{kOrtxOK}; // Error handler +}; + +inline OrtxStatus image_decoder(const ortc::Tensor& input, ortc::Tensor& output) { + const auto& dimensions = input.Shape(); + if (dimensions.size() != 1ULL) { + return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."}; + } + + // Get data & the length + const uint8_t* encoded_image_data = input.Data(); + const int64_t encoded_image_data_len = input.NumberOfElement(); + + // check it's a PNG image or JPEG image + if (encoded_image_data_len < 8) { + return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Invalid image data."}; + } + + OrtxStatus status{}; + if (png_sig_cmp(encoded_image_data, 0, 8) == 0) { + // Decode the PNG image + png_image image; + std::memset(&image, 0, sizeof(image)); // Use std::memset for clarity + image.version = PNG_IMAGE_VERSION; + + if (png_image_begin_read_from_memory(&image, encoded_image_data, static_cast(encoded_image_data_len)) == + 0) { + return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to read PNG image."}; + } + + image.format = PNG_FORMAT_RGB; // Ensure you have the appropriate format + const int height = image.height; + const int width = image.width; + const int channels = PNG_IMAGE_PIXEL_CHANNELS(image.format); // Calculates the number of channels based on format + + std::vector output_dimensions{height, width, channels}; + + uint8_t* decoded_image_data = output.Allocate(output_dimensions); + if (decoded_image_data == nullptr) { + return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to allocate memory for decoded image data."}; + } + + if (png_image_finish_read(&image, nullptr, decoded_image_data, 0, nullptr) == 0) { + return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to decode PNG image."}; + } + } else { + // Initialize JPEG decompression object + jpeg_decompress_struct cinfo; + jpeg_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + + // Set up the custom memory source manager + JMemorySourceManager srcManager(encoded_image_data, encoded_image_data_len); + cinfo.src = &srcManager; + + // Read the JPEG header to get image info + jpeg_read_header(&cinfo, TRUE); + + // Start decompression + jpeg_start_decompress(&cinfo); + + // Allocate memory for the image + std::vector output_dimensions{cinfo.output_height, cinfo.output_width, cinfo.output_components}; + uint8_t* imageBuffer = output.Allocate(output_dimensions); + + // Read the image data + int row_stride = cinfo.output_width * cinfo.output_components; + while (cinfo.output_scanline < cinfo.output_height) { + uint8_t* row_ptr = imageBuffer + (cinfo.output_scanline * row_stride); + jpeg_read_scanlines(&cinfo, &row_ptr, 1); + if (srcManager.extError != kOrtxOK) { + break; + } + } + + if (srcManager.extError != kOrtxOK) { + status = {srcManager.extError, "[ImageDecoder]: Failed to decode JPEG image."}; + } + + // Finish decompression + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + } + + return status; +} diff --git a/shared/api/image_processor.cc b/shared/api/image_processor.cc index 833cc236d..8fe9dc0bd 100644 --- a/shared/api/image_processor.cc +++ b/shared/api/image_processor.cc @@ -8,7 +8,7 @@ #include "image_processor.h" #include "c_api_utils.hpp" -#include "cv2/imgcodecs/imdecode.hpp" +#include "image_decoder.hpp" #include "image_transforms.hpp" #include "image_transforms_phi_3.hpp" @@ -179,7 +179,7 @@ OrtxStatus ImageProcessor::PreProcess(ort_extensions::span image_d operations_.back()->ResetTensors(allocator_); if (status.IsOk()) { r.SetTensors(std::move(img_result)); - r.SetTensorTypes({kOrtxFloat, kOrtxInt64, kOrtxInt64}); + // r.SetTensorTypes({kOrtxFloat, kOrtxInt64, kOrtxInt64}); } return status; diff --git a/test/data/processor/image_to_numpy.py b/test/data/processor/image_to_numpy.py new file mode 100644 index 000000000..9e14940da --- /dev/null +++ b/test/data/processor/image_to_numpy.py @@ -0,0 +1,41 @@ +import os +import tempfile +from PIL import Image + +from onnxruntime_extensions.pp_api import ImageProcessor + +img_proc = ImageProcessor(R""" +{ + "processor": { + "name": "image_processing", + "transforms": [ + { + "operation": { + "name": "decode_image", + "type": "DecodeImage", + "attrs": { + "color_space": "BGR" + } + } + }, + { + "operation": { + "name": "convert_to_rgb", + "type": "ConvertRGB" + } + } + ] + } +}""") + +img_name = "australia.jpg" +result = img_proc.pre_process(os.path.dirname(__file__) + "/" + img_name) +np_img = img_proc.to_numpy(result) +print(np_img.shape, np_img.dtype) + +# can save the image back to disk +img_rgb = np_img[0] +img_bgr = img_rgb[..., ::-1] +output_name = tempfile.gettempdir() + "/" + img_name +Image.fromarray(img_bgr).save(output_name) +print(output_name) diff --git a/test/data/processor/proctest.py b/test/data/processor/proctest.py new file mode 100644 index 000000000..9c807dc55 --- /dev/null +++ b/test/data/processor/proctest.py @@ -0,0 +1,76 @@ +import os +import tempfile +from PIL import Image +from transformers import AutoProcessor +from onnxruntime_extensions.pp_api import create_processor, load_images, image_pre_process, tensor_result_get_at + +import numpy as np + + +def regen_image(arr): + mean = np.array([0.48145466, 0.4578275, 0.40821073]) + std = np.array([0.26862954, 0.26130258, 0.27577711]) + + # Reverse normalization + array = arr * std + mean + + # Clip the values to [0, 1] range + array = np.clip(array, 0, 1) + + # Convert to [0, 255] range and uint8 type + array = (array * 255).astype(np.uint8) + + # Convert NumPy array to PIL Image + image = Image.fromarray(array) + return image + + +test_image = "test/data/processor/passport.png" +# test_image = "/temp/passport_s.png" +# test_image = "/temp/passport_s2.png" +model_id = "microsoft/Phi-3-vision-128k-instruct" + +processor = create_processor("test/data/processor/phi_3_image.json") +images = load_images([test_image]) +c_out = image_pre_process(processor, images) +# print(tensor_result_get_at(c_out, 0)) +# print(tensor_result_get_at(c_out, 1)) + +image = Image.open(test_image) +processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) +messages = [ + {"role": "user", "content": "<|image_1|>\nWhat is shown in this image?"}, + {"role": "assistant", "content": "The chart displays the percentage of respondents who agree with various statements about their preparedness for meetings. It shows five categories: 'Having clear and pre-defined goals for meetings', 'Knowing where to find the information I need for a meeting', 'Understanding my exact role and responsibilities when I'm invited', 'Having tools to manage admin tasks like note-taking or summarization', and 'Having more focus time to sufficiently prepare for meetings'. Each category has an associated bar indicating the level of agreement, measured on a scale from 0% to 100%."}, + {"role": "user", "content": "Provide insightful questions to spark discussion."} +] +prompt = processor.tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True) + + +inputs = processor(prompt, [image], return_tensors="pt") +# print(inputs["pixel_values"].numpy()) +# print(inputs["image_sizes"]) + +np.testing.assert_allclose( + inputs["image_sizes"].numpy(), tensor_result_get_at(c_out, 1)) +# np.testing.assert_allclose(inputs["pixel_values"].numpy(), tensor_result_get_at(c_out, 0), rtol=1e-1) + +if os.path.exists("/temp"): + temp_dir = "/temp" +else: + temp_dir = tempfile.mkdtemp() + print(f"Created temp dir: {temp_dir}") + +for i in range(17): + expected = inputs["pixel_values"].numpy()[0, i] + actual = tensor_result_get_at(c_out, 0)[0, i] + e_image = regen_image(expected.transpose(1, 2, 0)) + a_image = regen_image(actual.transpose(1, 2, 0)) + e_image.save(f"{temp_dir}/e_{i}.png") + a_image.save(f"{temp_dir}/a_{i}.png") + + try: + np.testing.assert_allclose(inputs["pixel_values"].numpy( + )[0, i], tensor_result_get_at(c_out, 0)[0, i], rtol=1e-2) + except AssertionError as e: + print(str(e)) diff --git a/test/pp_api_test/test_imgcodec.cc b/test/pp_api_test/test_imgcodec.cc new file mode 100644 index 000000000..87450cd7d --- /dev/null +++ b/test/pp_api_test/test_imgcodec.cc @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "shared/api/c_api_utils.hpp" +#include "shared/api/image_decoder.hpp" + +using namespace ort_extensions; + +TEST(ImgDecoderTest, TestPngDecoder) { + std::vector png_data; + std::filesystem::path png_path = "data/processor/exceltable.png"; + std::ifstream png_file(png_path, std::ios::binary); + ASSERT_TRUE(png_file.is_open()); + png_file.seekg(0, std::ios::end); + png_data.resize(png_file.tellg()); + png_file.seekg(0, std::ios::beg); + png_file.read(reinterpret_cast(png_data.data()), png_data.size()); + png_file.close(); + + ortc::Tensor png_tensor({static_cast(png_data.size())}, png_data.data()); + ortc::Tensor out_tensor{&CppAllocator::Instance()}; + auto status = image_decoder(png_tensor, out_tensor); + ASSERT_TRUE(status.IsOk()); + + ASSERT_EQ(out_tensor.Shape(), std::vector({206, 487, 3})); + auto out_range = out_tensor.Data() + 0; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); + + out_range = out_tensor.Data() + 477 * 3; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); + + out_range = out_tensor.Data() + 243 * 206 * 3; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217})); + + out_range = out_tensor.Data() + 485 * 206 * 3; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); +} + +TEST(ImageDecoderTest, TestJpegDecoder) { + std::vector jpeg_data; + std::filesystem::path jpeg_path = "data/processor/australia.jpg"; + std::ifstream jpeg_file(jpeg_path, std::ios::binary); + ASSERT_TRUE(jpeg_file.is_open()); + jpeg_file.seekg(0, std::ios::end); + jpeg_data.resize(jpeg_file.tellg()); + jpeg_file.seekg(0, std::ios::beg); + jpeg_file.read(reinterpret_cast(jpeg_data.data()), jpeg_data.size()); + jpeg_file.close(); + + ortc::Tensor jpeg_tensor({static_cast(jpeg_data.size())}, jpeg_data.data()); + ortc::Tensor out_tensor{&CppAllocator::Instance()}; + auto status = image_decoder(jpeg_tensor, out_tensor); + ASSERT_TRUE(status.IsOk()); + + ASSERT_EQ(out_tensor.Shape(), std::vector({876, 1300, 3})); + auto out_range = out_tensor.Data() + 0; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({48, 14, 5, 48, 14, 5, 48, 14, 5, 48, 14, 5})); + + out_range = out_tensor.Data() + 1296 * 3; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({221, 237, 224, 225, 236, 219, 218, 222, 199, 203, 202, 174})); + + out_range = out_tensor.Data() + 438 * 1300 * 3; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({84, 68, 55, 86, 70, 55, 92, 77, 58, 101, 86, 65})); + + out_range = out_tensor.Data() + 875 * 1300 * 3 + 1296 * 3; + ASSERT_EQ(std::vector(out_range, out_range + 12), + std::vector({208, 210, 197, 204, 206, 193, 198, 200, 187, 194, 196, 183})); +}