From a24cdd6b67a0e761474d2074d39ac73f4968caf7 Mon Sep 17 00:00:00 2001 From: Liangliang Ma Date: Tue, 22 Oct 2024 22:45:05 +0800 Subject: [PATCH] [XPU] [DeepNVMe] use same cpu_op_desc_t with cuda (#6645) We have found that #6592 uses `_pinned_tensor_mgr` to create cpu bounce buffer, which is same with what our xpu accelerator currently doing. So no need to use xpu device specific cpu_op_desc_t. In this PR: 1. remove custom csrc/xpu/aio/deepspeed_cpu_op.cpp 2. modify xpu async_io opbuilder. This issue cannot be easily done with revert #6532 , for we added some source file as last time GDS feature going in DS. So file this new PR :) --- csrc/xpu/aio/deepspeed_cpu_op.cpp | 51 ------------------------------- op_builder/xpu/async_io.py | 2 +- 2 files changed, 1 insertion(+), 52 deletions(-) delete mode 100644 csrc/xpu/aio/deepspeed_cpu_op.cpp diff --git a/csrc/xpu/aio/deepspeed_cpu_op.cpp b/csrc/xpu/aio/deepspeed_cpu_op.cpp deleted file mode 100644 index ee98c2d5cac2..000000000000 --- a/csrc/xpu/aio/deepspeed_cpu_op.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// SPDX-License-Identifier: Apache-2.0 - -// DeepSpeed Team - -#include "deepspeed_cpu_op.h" - -using namespace std; - -cpu_op_desc_t::cpu_op_desc_t(const bool read_op, - const torch::Tensor& buffer, - const int fd, - const char* filename, - const long long int file_num_bytes, - const int num_threads, - const bool validate) - : io_op_desc_t(read_op, buffer, fd, filename, file_num_bytes, num_threads, validate), - _cpu_buffer(buffer) -{ - // XPU don't handle buffer here. See XPU Accelerator pin_memory. - _contiguous_buffer = _cpu_buffer.contiguous(); -} - -char* cpu_op_desc_t::data_ptr() const { return (char*)_contiguous_buffer.data_ptr(); } - -void cpu_op_desc_t::finish() -{ - if (_read_op && _buffer.is_xpu()) { _buffer.copy_(_cpu_buffer.to(torch::kXPU)); } -} - -void cpu_op_desc_t::validate() -{ - validate_aio_operation(_read_op, _filename.c_str(), data_ptr(), _file_num_bytes); -} - -void cpu_op_desc_t::run(const int tid, - std::unique_ptr& aio_ctxt, - deepspeed_aio_config_t* aio_config) -{ - assert(tid < _num_threads); - const auto base_offset = _num_bytes_per_thread * tid; - - std::unique_ptr xfer_ctxt( - new io_xfer_ctxt(_fd, base_offset, _num_bytes_per_thread, data_ptr())); - - if (aio_config->_overlap_events) { - do_aio_operation_overlap(_read_op, aio_ctxt, xfer_ctxt, aio_config, nullptr); - } else { - do_aio_operation_sequential(_read_op, aio_ctxt, xfer_ctxt, aio_config, nullptr); - } -} diff --git a/op_builder/xpu/async_io.py b/op_builder/xpu/async_io.py index 6a6798eaeb9c..2da963ae64aa 100644 --- a/op_builder/xpu/async_io.py +++ b/op_builder/xpu/async_io.py @@ -31,7 +31,7 @@ def sources(self): 'csrc/aio/common/deepspeed_aio_types.cpp', 'csrc/aio/py_lib/deepspeed_pin_tensor.cpp', 'csrc/aio/py_lib/deepspeed_py_io_handle.cpp', - 'csrc/xpu/aio/deepspeed_cpu_op.cpp', + 'csrc/aio/py_lib/deepspeed_cpu_op.cpp', 'csrc/aio/py_lib/deepspeed_aio_op_desc.cpp', ]