Skip to content

Commit

Permalink
Merge branch 'main' into yguo/fix-one-branch-yml
Browse files Browse the repository at this point in the history
  • Loading branch information
YUNQIUGUO authored Feb 5, 2024
2 parents 859f408 + d47a3dd commit 3e14cdf
Show file tree
Hide file tree
Showing 16 changed files with 174 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# turn off readability-braces-around-statements to allow single line statement like 'if (x == y) doSomething();'
Checks: '-*,cppcoreguidelines-*,google-*,readability-*,modernize-*,-readability-braces-around-statements,-google-runtime-references,-cppcoreguidelines-pro-type-reinterpret-cast'
WarningsAsErrors: ''
HeaderFilterRegex: '.*onnxruntime\/core\/.*'
HeaderFilterRegex: 'includes\/.*'
AnalyzeTemporaryDtors: false
FormatStyle: none
CheckOptions:
Expand Down
10 changes: 5 additions & 5 deletions .pipelines/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ stages:
name: 'onnxruntime-extensions-Linux-GPU-A10'
timeoutInMinutes: 120
variables:
ORT_VERSION: '1.16.2'
ORT_VERSION: '1.16.3'
TORCH_VERSION: 'torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118'
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
Expand All @@ -584,7 +584,7 @@ stages:
userRepository: 'microsoft/onnxruntime'
defaultVersionType: 'specificTag'
version: 'v$(ORT_VERSION)'
itemPattern: '*-linux-x64-$(ORT_VERSION)*'
itemPattern: '*-linux-x64-gpu-$(ORT_VERSION)*'
downloadPath: '$(Build.SourcesDirectory)'
displayName: Download the ONNXRuntime prebuilt package.

Expand All @@ -609,7 +609,7 @@ stages:
script: |
docker run --gpus all --rm \
--volume $(Build.SourcesDirectory):/onnxruntime-extensions \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \
-e CUDA_PATH=/usr/local/cuda-11.8 \
onnxruntime-extensionscuda11build \
/bin/bash -c "
Expand All @@ -626,7 +626,7 @@ stages:
script: |
docker run --gpus all --rm \
--volume $(Build.SourcesDirectory):/onnxruntime-extensions \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \
-e CUDA_PATH=/usr/local/cuda-11.8 \
onnxruntime-extensionscuda11build \
/bin/bash -c "
Expand All @@ -644,7 +644,7 @@ stages:
script: |
docker run --gpus all --rm \
--volume $(Build.SourcesDirectory):/onnxruntime-extensions \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \
-e CUDA_PATH=/usr/local/cuda-11.8 \
onnxruntime-extensionscuda11build \
/bin/bash -c "
Expand Down
1 change: 1 addition & 0 deletions onnxruntime_extensions/_extensions_pydll.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class PyCustomOpDef:
dt_complex64: int = ...
dt_complex128: int = ...
dt_bfloat16: int = ...

def install_hooker(self, invocation_handler: Callable) -> None:
...
...
Expand Down
55 changes: 41 additions & 14 deletions onnxruntime_extensions/_hf_cvt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

import json
import onnx
import uuid
import numpy as np
from numpy import array as nparray
from functools import partial
from collections import namedtuple, OrderedDict
Expand All @@ -31,7 +29,8 @@ def convert_bpe_vocab(hf_tokenizer):
# ids = sorted(hf_tokenizer.added_tokens_encoder.values())
# if not ids == list(range(min(ids), max(ids) + 1)):
# raise RuntimeError(f"{hf_tokenizer.__name__}: the ids in added_tokens_encoder are not consecutive")
token_map = [f"{_k}={_v}" for _k, _v in hf_tokenizer.added_tokens_encoder.items()]
token_map = [f"{_k}={_v}" for _k,
_v in hf_tokenizer.added_tokens_encoder.items()]
attrs.update({"added_token": "\n".join(token_map)})

sorted_merges = {v_: k_ for k_, v_ in hf_tokenizer.bpe_ranks.items()}
Expand All @@ -42,7 +41,8 @@ def convert_bpe_vocab(hf_tokenizer):
def bpe_tokenizer(self, **kwargs):
hf_gpt2_tokenizer = self.tokenizer
if type(self.tokenizer).__name__.endswith('Fast'):
raise ValueError('Please use the slow version of the tokenizer (ex: GPT2Tokenizer).')
raise ValueError(
'Please use the slow version of the tokenizer (ex: GPT2Tokenizer).')

attrs = self.convert_bpe_vocab(hf_gpt2_tokenizer)
attrs.update(**kwargs)
Expand All @@ -51,12 +51,15 @@ def bpe_tokenizer(self, **kwargs):
def bert_tokenizer(self, **kwargs):
hf_bert_tokenizer = self.tokenizer
# has to be sorted since the id of token was generated automatically.
ordered_vocab = OrderedDict(sorted(hf_bert_tokenizer.vocab.items(), key=lambda item: int(item[1])))
ordered_vocab = OrderedDict(
sorted(hf_bert_tokenizer.vocab.items(), key=lambda item: int(item[1])))
vocab = '\n'.join(ordered_vocab.keys())
attrs = dict(vocab=vocab)
init_kwargs = hf_bert_tokenizer.init_kwargs
attrs['do_lower_case'] = 1 if 'do_lower_case' in init_kwargs and init_kwargs.get('do_lower_case') else 0
attrs['strip_accents'] = 1 if 'strip_accents' in init_kwargs and init_kwargs.get('strip_accents') else 0
attrs['do_lower_case'] = 1 if 'do_lower_case' in init_kwargs and init_kwargs.get(
'do_lower_case') else 0
attrs['strip_accents'] = 1 if 'strip_accents' in init_kwargs and init_kwargs.get(
'strip_accents') else 0
attrs.update(**kwargs)
return attrs

Expand Down Expand Up @@ -91,7 +94,8 @@ def clip_tokenizer(self, **kwargs):
hf_clip_tokenizer = self.tokenizer

if type(self.tokenizer).__name__.endswith('Fast'):
raise ValueError('Please use the slow version of the tokenizer (ex: CLIPTokenizer).')
raise ValueError(
'Please use the slow version of the tokenizer (ex: CLIPTokenizer).')

attrs = self.convert_bpe_vocab(hf_clip_tokenizer)
attrs.update(**kwargs)
Expand All @@ -101,7 +105,8 @@ def roberta_tokenizer(self, **kwargs):
hf_roberta_tokenizer = self.tokenizer

if type(self.tokenizer).__name__.endswith('Fast'):
raise ValueError('Please use the slow version of the tokenizer (ex: RobertaTokenizer).')
raise ValueError(
'Please use the slow version of the tokenizer (ex: RobertaTokenizer).')

attrs = self.convert_bpe_vocab(hf_roberta_tokenizer)
attrs.update(**kwargs)
Expand Down Expand Up @@ -133,7 +138,7 @@ def spm_decoder(self, **kwargs):
"DistilBertTokenizer": TokenOpParam('BertTokenizer', HFTokenizerConverter.bert_tokenizer,
'BertDecoder', HFTokenizerConverter.bpe_decoder, None),
"GPT2Tokenizer": TokenOpParam('GPT2Tokenizer', HFTokenizerConverter.bpe_tokenizer,
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
"CodeGenTokenizer": TokenOpParam('GPT2Tokenizer', HFTokenizerConverter.bpe_tokenizer,
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
"CLIPTokenizer": TokenOpParam('CLIPTokenizer', HFTokenizerConverter.clip_tokenizer,
Expand Down Expand Up @@ -167,7 +172,8 @@ class HFTokenizerOnnxGraph:

@staticmethod
def extract_cls_name(processor):
cls_name = processor if isinstance(processor, str) else type(processor).__name__
cls_name = processor if isinstance(
processor, str) else type(processor).__name__
if cls_name.endswith("TokenizerFast"):
cls_name = cls_name[:-len("Fast")]
return cls_name
Expand All @@ -184,6 +190,8 @@ def __init__(self, processor, **kwargs):

def pre_processing(self, **kwargs):
with_default_inputs = kwargs.pop("WITH_DEFAULT_INPUTS", True)
cast_token_id = kwargs.pop("CAST_TOKEN_ID", False)

_cvt_op = self.cvt_quadruple.pre_op
_cvt_func = self.cvt_quadruple.pre_attribute_cvt
cvt = partial(_cvt_func, self.cvt_obj)
Expand All @@ -200,22 +208,41 @@ def pre_processing(self, **kwargs):
if self.cvt_quadruple.default_inputs is not None:
default_inputs.update(self.cvt_quadruple.default_inputs)
if len(default_inputs) != n_inputs:
raise ValueError("Op: {} does have the inputs from its TokenOpParam.".format(_cvt_op))
raise ValueError(
"Op: {} does not have the inputs from its TokenOpParam.".format(_cvt_op))

new_initializers = []

for k, v in default_inputs.items():
input_value_info = next((i for i in g.input if i.name == k), None)
if input_value_info is None:
raise ValueError("The input {} is not found in the graph".format(k))
raise ValueError(
"The input {} is not found in the graph".format(k))

np_dtype = onnx.helper.tensor_dtype_to_np_dtype(input_value_info.type.tensor_type.elem_type)
np_dtype = onnx.helper.tensor_dtype_to_np_dtype(
input_value_info.type.tensor_type.elem_type)
value = nparray(v, np_dtype)
new_initializers.append(onnx.numpy_helper.from_array(value, k))
g.initializer.extend(new_initializers)
new_inputs = [i for i in g.input if i.name not in default_inputs]
g.ClearField("input")
g.input.extend(new_inputs)

if cast_token_id:
# assume the first output is always the token ID.
if g.output[0].type.tensor_type.elem_type != onnx.onnx_pb.TensorProto.INT64:
new_output_name = g.output[0].name + '_cast'
shape = g.output[0].type.tensor_type.shape
cast_node = onnx.helper.make_node('Cast', [g.output[0].name], [new_output_name],
to=onnx.onnx_pb.TensorProto.INT64)
new_output = [onnx.helper.make_tensor_value_info(
new_output_name, onnx.onnx_pb.TensorProto.INT64, None)] + list(g.output)[1:]
if shape is not None:
new_output[0].type.tensor_type.shape.CopyFrom(shape)
g.node.append(cast_node)
g.ClearField('output')
g.output.extend(new_output)

return g

def post_processing(self, **kwargs):
Expand Down
23 changes: 16 additions & 7 deletions onnxruntime_extensions/cvt.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def gen_processing_models(processor: Union[str, object],
has to be provided in the kwargs
pre_kwargs: dict
Keyword arguments for generating the pre-processing model
WITH_DEFAULT_INPUTS: bool, add default inputs to the graph, default is True
CAST_TOKEN_ID: bool, add a cast op to output token IDs to be int64 if needed, default is False
post_kwargs: dict
Keyword arguments for generating the post-processing model
opset: int
Expand All @@ -54,7 +56,8 @@ def gen_processing_models(processor: Union[str, object],
The pre- and post-processing ONNX models
"""
if pre_kwargs is None and post_kwargs is None:
raise ValueError("Either pre_kwargs or post_kwargs should be provided. None means no processing")
raise ValueError(
"Either pre_kwargs or post_kwargs should be provided. None means no processing graph output.")
if isinstance(processor, str):
g_pre, g_post = (None, None)
if pre_kwargs:
Expand All @@ -64,23 +67,29 @@ def gen_processing_models(processor: Union[str, object],
cls_name = processor
else:
if processor not in _PRE_POST_PAIR:
raise RuntimeError(f"Cannot locate the post processing operator name from {processor}")
raise RuntimeError(
f"Cannot locate the post processing operator name from {processor}")
cls_name = _PRE_POST_PAIR[processor]
g_post = SingleOpGraph.build_graph(cls_name, **post_kwargs)
return make_onnx_model(g_pre) if g_pre else None, make_onnx_model(g_post) if g_post else None

cls_name = type(processor).__name__
if cls_name == "WhisperProcessor":
if WhisperDataProcGraph is None:
raise ValueError("The Whisper processor needs torch.onnx support, please install pytorch 2.0 and above")
raise ValueError(
"The Whisper processor needs torch.onnx support, please install pytorch 2.0 and above")
_converter = WhisperDataProcGraph(processor, opset=opset, **kwargs)
pre_m = _converter.pre_processing(**pre_kwargs) if pre_kwargs is not None else None
post_m = _converter.post_processing(**post_kwargs) if post_kwargs is not None else None
pre_m = _converter.pre_processing(
**pre_kwargs) if pre_kwargs is not None else None
post_m = _converter.post_processing(
**post_kwargs) if post_kwargs is not None else None
return pre_m, post_m
elif HFTokenizerOnnxGraph.is_supported(processor):
_converter = HFTokenizerOnnxGraph(processor)
pre_g = _converter.pre_processing(**pre_kwargs) if pre_kwargs is not None else None
post_g = _converter.post_processing(**post_kwargs) if post_kwargs is not None else None
pre_g = _converter.pre_processing(
**pre_kwargs) if pre_kwargs is not None else None
post_g = _converter.post_processing(
**post_kwargs) if post_kwargs is not None else None
return make_onnx_model(pre_g) if pre_g else None, \
make_onnx_model(post_g) if post_g else None
else:
Expand Down
18 changes: 12 additions & 6 deletions onnxruntime_extensions/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,13 @@ def remove_unused_initializers(subgraph, top_level_initializers=None):
all_initializers = initializers + top_level_initializers

# Filter the initializers by checking if their names are in the list of used input tensors
used_initializers = [init for init in all_initializers if init.name in input_tensors]
used_initializers = [
init for init in all_initializers if init.name in input_tensors]

# Update the subgraph's initializers
del subgraph.initializer[:]
subgraph.initializer.extend([init for init in used_initializers if init in initializers])
subgraph.initializer.extend(
[init for init in used_initializers if init in initializers])

# Recursively process subgraphs within this subgraph
for node in nodes:
Expand All @@ -125,7 +127,8 @@ def remove_unused_initializers(subgraph, top_level_initializers=None):
remove_unused_initializers(attr.g, top_level_initializers)
elif attr.type == onnx.AttributeProto.GRAPHS:
for subgraph in attr.graphs:
remove_unused_initializers(subgraph, top_level_initializers)
remove_unused_initializers(
subgraph, top_level_initializers)


def quick_merge(*models, connection_indices=None):
Expand All @@ -150,12 +153,14 @@ def quick_merge(*models, connection_indices=None):
merged_graph = models[0].graph

# Dictionary to store unique opsets
opset_imports = {opset.domain if opset.domain else "ai.onnx": opset for opset in models[0].opset_import}
opset_imports = {
opset.domain if opset.domain else "ai.onnx": opset for opset in models[0].opset_import}

# Iterate over all other models and merge
for model_idx, model in enumerate(models[1:], start=1):
if connection_indices is None:
io_map = [(out.name, in_.name) for out, in_ in zip(models[model_idx - 1].graph.output, model.graph.input)]
io_map = [(out.name, in_.name) for out, in_ in zip(
models[model_idx - 1].graph.output, model.graph.input)]
else:
io_map = [(models[model_idx - 1].graph.output[out_idx].name, model.graph.input[in_idx].name)
for out_idx, in_idx in connection_indices[model_idx - 1]]
Expand All @@ -174,7 +179,8 @@ def quick_merge(*models, connection_indices=None):

default_opset = opset_imports.pop("ai.onnx", None)
merged_model = onnx.helper.make_model_gen_version(merged_graph,
opset_imports=[default_opset],
opset_imports=[
default_opset],
producer_name='ONNX Model Merger')
merged_model.opset_import.extend(opset_imports.values())
return merged_model
File renamed without changes.
45 changes: 23 additions & 22 deletions operators/tokenizer/sentencepiece_tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,32 +83,33 @@ OrtStatusPtr KernelSentencepieceTokenizer::Compute(const ortc::Tensor<std::strin
content.push_back(tokenizer_.eos_id());
token_indices.push_back(ort_extensions::narrow<int32_t>(str_input[i].length()));
}

if (fairseq.has_value() && (*fairseq)) {
// HF Fairseq Example (XLMRobertaTokenizer) : https://huggingface.co/transformers/v4.6.0/_modules/transformers/models/xlm_roberta/tokenization_xlm_roberta.html#XLMRobertaTokenizer
//
// Original fairseq vocab and spm vocab must be "aligned":
// Vocab | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
// -------- | ------- | ------- | ------ | ------- | --- | --- | --- | ----- | ----- | ----
// fairseq | '<s>' | '<pad>' | '</s>' | '<unk>' | ',' | '.' | '▁' | 's' | '▁de' | '-'
// spm | '<unk>' | '<s>' | '</s>' | ',' | '.' | '▁' | 's' | '▁de' | '-' | '▁a'
//
// As per HF, the first "real" token "," has position 4 in the XLMRobertaTokenizer vocab and position
// 3 in the SPM vocab, so we add a padding value of 1 to IDs, and fix exceptions for '<unk>' and '<s>'.
std::for_each(content.begin(), content.end(), [](int& n) {
if (n == 0) { // '<unk>': 0 -> 3
n = 3;
} else if (n == 1) { // '<s>': 1 -> 0
n = 0;
} else if (n != 2) { // '</s>': 2 -> 2, '<*>': x -> x + 1
n++;
}
});
}
}
}
instance_indices.push_back(content.size());

// Patch fairseq indices
if (fairseq.has_value() && (*fairseq) && !add_rev) {
// HF Fairseq Example (XLMRobertaTokenizer) : https://huggingface.co/transformers/v4.6.0/_modules/transformers/models/xlm_roberta/tokenization_xlm_roberta.html#XLMRobertaTokenizer
//
// Original fairseq vocab and spm vocab must be "aligned":
// Vocab | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
// -------- | ------- | ------- | ------ | ------- | --- | --- | --- | ----- | ----- | ----
// fairseq | '<s>' | '<pad>' | '</s>' | '<unk>' | ',' | '.' | '▁' | 's' | '▁de' | '-'
// spm | '<unk>' | '<s>' | '</s>' | ',' | '.' | '▁' | 's' | '▁de' | '-' | '▁a'
//
// As per HF, the first "real" token "," has position 4 in the XLMRobertaTokenizer vocab and position
// 3 in the SPM vocab, so we add a padding value of 1 to IDs, and fix exceptions for '<unk>' and '<s>'.
std::for_each(content.begin(), content.end(), [](int& n) {
if (n == 0) { // '<unk>': 0 -> 3
n = 3;
} else if (n == 1) { // '<s>': 1 -> 0
n = 0;
} else if (n != 2) { // '</s>': 2 -> 2, '<*>': x -> x + 1
n++;
}
});
}

// Setup output
std::vector<int64_t> size_content(1);
size_content[0] = content.size();
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
[build]
build_base = .scb
# debug = 1
Binary file added test/data/cuda/test_fastgelu.onnx
Binary file not shown.
Binary file added test/data/cuda/test_fastgelu_f16.onnx
Binary file not shown.
Binary file added test/data/cuda/test_negpos.onnx
Binary file not shown.
3 changes: 2 additions & 1 deletion test/shared_test/test_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ void RunSession(Ort::Session& session_object,
void TestInference(Ort::Env& env, const ORTCHAR_T* model_uri,
const std::vector<TestValue>& inputs,
const std::vector<TestValue>& outputs,
OutputValidator output_validator = nullptr);
OutputValidator output_validator = nullptr,
void* cuda_compute_stream = nullptr);

void GetTensorMutableDataString(const OrtApi& api, const OrtValue* value, std::vector<std::string>& output);
Loading

0 comments on commit 3e14cdf

Please sign in to comment.