Skip to content

Commit

Permalink
Implement arch_parser.c as pure dll (#3230)
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev authored Jan 23, 2025
1 parent b287705 commit d7d55b8
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 35 deletions.
36 changes: 8 additions & 28 deletions third_party/intel/backend/arch_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,17 @@

#include <sycl/sycl.hpp>

#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <Python.h>
#include <numpy/arrayobject.h>

static PyObject *parseDeviceArch(PyObject *self, PyObject *args) {
uint64_t dev_arch;
assert(PyArg_ParseTuple(args, "K", &dev_arch) && "Expected an integer");
#if defined(_WIN32)
#define EXPORT_FUNC __declspec(dllexport)
#else
#define EXPORT_FUNC __attribute__((visibility("default")))
#endif

extern "C" EXPORT_FUNC const char *parse_device_arch(uint64_t dev_arch) {
sycl::ext::oneapi::experimental::architecture sycl_arch =
static_cast<sycl::ext::oneapi::experimental::architecture>(dev_arch);
// FIXME: Add support for more architectures.
std::string arch = "";
const char *arch = "";
switch (sycl_arch) {
case sycl::ext::oneapi::experimental::architecture::intel_gpu_pvc:
arch = "pvc";
Expand All @@ -39,24 +38,5 @@ static PyObject *parseDeviceArch(PyObject *self, PyObject *args) {
std::cerr << "sycl_arch not recognized: " << (int)sycl_arch << std::endl;
}

return Py_BuildValue("s", arch.c_str());
}

static PyMethodDef ModuleMethods[] = {
{"parse_device_arch", parseDeviceArch, METH_VARARGS,
"parse device architecture"},
{NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {PyModuleDef_HEAD_INIT, "arch_utils",
NULL, // documentation
-1, // size
ModuleMethods};

PyMODINIT_FUNC PyInit_arch_utils(void) {
if (PyObject *m = PyModule_Create(&ModuleDef)) {
PyModule_AddFunctions(m, ModuleMethods);
return m;
}
return NULL;
return arch;
}
13 changes: 6 additions & 7 deletions third_party/intel/backend/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def __init__(self, target: tuple) -> None:
raise TypeError("target.arch is not a dict")
dirname = os.path.dirname(os.path.realpath(__file__))
mod = compile_module_from_src(Path(os.path.join(dirname, "arch_parser.c")).read_text(), "arch_utils")
self.parse_device_arch = mod.parse_device_arch
self.device_arch = mod.parse_device_arch(target.arch.get('architecture', 0))
self.properties = self.parse_target(target.arch)
self.binary_ext = "spv"

Expand All @@ -155,13 +155,12 @@ def parse_target(self, tgt_prop) -> dict:
dev_prop['has_subgroup_2d_block_io'] = tgt_prop.get('has_subgroup_2d_block_io', False)
dev_prop['has_bfloat16_conversions'] = tgt_prop.get('has_bfloat16_conversions', True)

device_arch = self.parse_device_arch(tgt_prop.get('architecture', 0))
if device_arch and shutil.which('ocloc'):
if device_arch in self.device_props:
dev_prop.update(self.device_props[device_arch])
if self.device_arch and shutil.which('ocloc'):
if self.device_arch in self.device_props:
dev_prop.update(self.device_props[self.device_arch])
return dev_prop
try:
ocloc_cmd = ['ocloc', 'query', 'CL_DEVICE_EXTENSIONS', '-device', device_arch]
ocloc_cmd = ['ocloc', 'query', 'CL_DEVICE_EXTENSIONS', '-device', self.device_arch]
with tempfile.TemporaryDirectory() as temp_dir:
output = subprocess.check_output(ocloc_cmd, text=True, cwd=temp_dir)
supported_extensions = set()
Expand All @@ -174,7 +173,7 @@ def parse_target(self, tgt_prop) -> dict:
'has_subgroup_matrix_multiply_accumulate_tensor_float32'] = 'cl_intel_subgroup_matrix_multiply_accumulate_tensor_float32' in supported_extensions
ocloc_dev_prop['has_subgroup_2d_block_io'] = 'cl_intel_subgroup_2d_block_io' in supported_extensions
ocloc_dev_prop['has_bfloat16_conversions'] = 'cl_intel_bfloat16_conversions' in supported_extensions
self.device_props[device_arch] = ocloc_dev_prop
self.device_props[self.device_arch] = ocloc_dev_prop
dev_prop.update(ocloc_dev_prop)
except subprocess.CalledProcessError:
# Note: LTS driver does not support ocloc query CL_DEVICE_EXTENSIONS.
Expand Down
38 changes: 38 additions & 0 deletions third_party/intel/backend/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import hashlib
import shutil
import ctypes
import sysconfig
import tempfile
from pathlib import Path
Expand Down Expand Up @@ -134,6 +135,39 @@ def libsycl_dir(self) -> str:
COMPILATION_HELPER = CompilationHelper()


class ArchParser:

def __init__(self, cache_path: str):
self.shared_library = ctypes.CDLL(cache_path)
self.shared_library.parse_device_arch.restype = ctypes.c_char_p
self.shared_library.parse_device_arch.argtypes = (ctypes.c_uint64, )

def __getattribute__(self, name):
if name == "parse_device_arch":
shared_library = super().__getattribute__("shared_library")
attr = getattr(shared_library, name)

def wrapper(*args, **kwargs):
return attr(*args, **kwargs).decode("utf-8")

return wrapper

return super().__getattribute__(name)

if os.name != 'nt':

def __del__(self):
handle = self.shared_library._handle
self.shared_library.dlclose.argtypes = (ctypes.c_void_p, )
self.shared_library.dlclose(handle)
else:

def __del__(self):
handle = self.shared_library._handle
ctypes.windll.kernel32.FreeLibrary.argtypes = (ctypes.c_uint64, )
ctypes.windll.kernel32.FreeLibrary(handle)


def compile_module_from_src(src, name):
key = hashlib.sha256(src.encode("utf-8")).hexdigest()
cache = get_cache_manager(key)
Expand All @@ -155,6 +189,10 @@ def compile_module_from_src(src, name):
COMPILATION_HELPER.libraries, extra_compile_args=extra_compiler_args)
with open(so, "rb") as f:
cache_path = cache.put(f.read(), file_name, binary=True)

if name == 'arch_utils':
return ArchParser(cache_path)

import importlib.util
spec = importlib.util.spec_from_file_location(name, cache_path)
mod = importlib.util.module_from_spec(spec)
Expand Down

0 comments on commit d7d55b8

Please sign in to comment.