From e2696603f45f5796f1c048aab33eef11aaeb2cdb Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Wed, 27 Nov 2024 16:25:02 +0800 Subject: [PATCH] Remove the examples force required torch 1.13.1 (#2074) * remove alexnet_fashion_mnist notebook Signed-off-by: chensuyue * remove rnnt in pytorch examples Signed-off-by: chensuyue --------- Signed-off-by: chensuyue --- examples/.config/model_params_pytorch.json | 7 - examples/README.md | 7 - .../pytorch/alexnet_fashion_mnist/License.txt | 7 - .../pytorch/alexnet_fashion_mnist/README.md | 251 -------------- .../inc_sample_for_pytorch.ipynb | 310 ------------------ .../alexnet_fashion_mnist/output/readme.txt | 1 - .../scripts/conda_set_env.sh | 8 - .../scripts/devcloud_setup_env.sh | 7 - .../scripts/pip_set_env.sh | 11 - .../scripts/python_src/alexnet.py | 130 -------- .../scripts/python_src/compare_perf.py | 82 ----- .../scripts/python_src/fashion_mnist.py | 28 -- .../scripts/python_src/inc_quantize_model.py | 70 ---- .../scripts/python_src/profiling_inc.py | 85 ----- .../python_src/train_alexnet_fashion_mnist.py | 21 -- .../scripts/requirements.txt | 7 - .../scripts/run_in_intel_devcloud.sh | 8 - .../scripts/run_jupyter.sh | 2 - .../scripts/run_sample.sh | 48 --- .../rnnt/quantization/ptq_dynamic/fx/QSL.py | 68 ---- .../quantization/ptq_dynamic/fx/README.md | 75 ----- .../ptq_dynamic/fx/accuracy_eval.py | 51 --- .../quantization/ptq_dynamic/fx/mlperf.conf | 65 ---- .../ptq_dynamic/fx/prepare_dataset.sh | 55 ---- .../ptq_dynamic/fx/prepare_loadgen.sh | 10 - .../ptq_dynamic/fx/pytorch/Dockerfile | 46 --- .../ptq_dynamic/fx/pytorch/LICENSE | 204 ------------ .../ptq_dynamic/fx/pytorch/configs/rnnt.toml | 77 ----- .../ptq_dynamic/fx/pytorch/dataset.py | 159 --------- .../ptq_dynamic/fx/pytorch/decoders.py | 121 ------- .../ptq_dynamic/fx/pytorch/helpers.py | 123 ------- .../ptq_dynamic/fx/pytorch/metrics.py | 67 ---- .../fx/pytorch/model_separable_rnnt.py | 214 ------------ .../ptq_dynamic/fx/pytorch/parts/features.py | 260 --------------- .../ptq_dynamic/fx/pytorch/parts/manifest.py | 176 ---------- .../ptq_dynamic/fx/pytorch/parts/segment.py | 170 ---------- .../ptq_dynamic/fx/pytorch/parts/text/LICENSE | 19 -- .../fx/pytorch/parts/text/__init__.py | 12 - .../fx/pytorch/parts/text/cleaners.py | 116 ------- .../fx/pytorch/parts/text/numbers.py | 101 ------ .../ptq_dynamic/fx/pytorch/preprocessing.py | 39 --- .../ptq_dynamic/fx/pytorch/rnn.py | 110 ------- .../fx/pytorch/scripts/docker/build.sh | 3 - .../fx/pytorch/scripts/docker/launch.sh | 32 -- .../pytorch/scripts/download_librispeech.sh | 28 -- .../fx/pytorch/scripts/evaluation.sh | 92 ------ .../fx/pytorch/scripts/inference.sh | 104 ------ .../fx/pytorch/scripts/inference_benchmark.sh | 84 ----- .../pytorch/scripts/preprocess_librispeech.sh | 51 --- .../ptq_dynamic/fx/pytorch/scripts/train.sh | 113 ------- .../fx/pytorch/scripts/train_benchmark.sh | 130 -------- .../ptq_dynamic/fx/pytorch/utils/__init__.py | 0 .../fx/pytorch/utils/convert_librispeech.py | 82 ----- .../fx/pytorch/utils/download_librispeech.py | 76 ----- .../fx/pytorch/utils/download_utils.py | 69 ---- .../fx/pytorch/utils/preprocessing_utils.py | 77 ----- .../ptq_dynamic/fx/pytorch_SUT.py | 104 ------ .../ptq_dynamic/fx/requirements.txt | 10 - .../rnnt/quantization/ptq_dynamic/fx/run.sh | 90 ----- .../ptq_dynamic/fx/run_benchmark.sh | 84 ----- .../quantization/ptq_dynamic/fx/run_quant.sh | 59 ---- .../quantization/ptq_dynamic/fx/run_tune.py | 151 --------- .../quantization/ptq_dynamic/fx/user.conf | 6 - 63 files changed, 4873 deletions(-) delete mode 100644 examples/notebook/pytorch/alexnet_fashion_mnist/License.txt delete mode 100644 examples/notebook/pytorch/alexnet_fashion_mnist/README.md delete mode 100644 examples/notebook/pytorch/alexnet_fashion_mnist/inc_sample_for_pytorch.ipynb delete mode 100644 examples/notebook/pytorch/alexnet_fashion_mnist/output/readme.txt delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/conda_set_env.sh delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/devcloud_setup_env.sh delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/pip_set_env.sh delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/alexnet.py delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/compare_perf.py delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/fashion_mnist.py delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/inc_quantize_model.py delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/profiling_inc.py delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/train_alexnet_fashion_mnist.py delete mode 100644 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/requirements.txt delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_in_intel_devcloud.sh delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_jupyter.sh delete mode 100755 examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_sample.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/QSL.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/README.md delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/accuracy_eval.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/mlperf.conf delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_dataset.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_loadgen.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/Dockerfile delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/LICENSE delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/configs/rnnt.toml delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/dataset.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/decoders.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/helpers.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/metrics.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/model_separable_rnnt.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/features.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/manifest.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/segment.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/LICENSE delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/__init__.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/cleaners.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/numbers.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/preprocessing.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/rnn.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/build.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/launch.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/download_librispeech.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/evaluation.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference_benchmark.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/preprocess_librispeech.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train_benchmark.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/__init__.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/convert_librispeech.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_librispeech.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_utils.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/preprocessing_utils.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch_SUT.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/requirements.txt delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_benchmark.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_quant.sh delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_tune.py delete mode 100644 examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/user.conf diff --git a/examples/.config/model_params_pytorch.json b/examples/.config/model_params_pytorch.json index 562b2d7b094..168c8125b28 100644 --- a/examples/.config/model_params_pytorch.json +++ b/examples/.config/model_params_pytorch.json @@ -316,13 +316,6 @@ "batch_size": 100, "main_script": "run.py" }, - "rnnt": { - "model_src_dir": "speech_recognition/rnnt/quantization/ptq_dynamic/fx", - "dataset_location": "/tf_dataset/pytorch/rnnt/convert_dataset/", - "input_model": "/tf_dataset/pytorch/rnnt/rnnt.pt", - "main_script": "run_tune.py", - "batch_size": 100 - }, "wav2vec2_dynamic":{ "model_src_dir": "speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx", "dataset_location": "/tf_dataset2/datasets/speech_data/LibriSpeech/test-clean", diff --git a/examples/README.md b/examples/README.md index 3f2b1e50ad3..d7b2c7ea88a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -486,12 +486,6 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Dynamic Quantization fx - - RNNT - Speech Recognition - Post-Training Dynamic Quantization - fx - BlendCNN Natural Language Processing @@ -1479,7 +1473,6 @@ Intel® Neural Compressor validated examples with multiple compression technique # Notebook Examples * [Performance of FP32 Vs. INT8 ResNet50 Model](/examples/notebook/perf_fp32_int8_tf): compare existed FP32 & INT8 ResNet50 model directly. -* [Intel® Neural Compressor Sample for PyTorch*](/examples/notebook/pytorch/alexnet_fashion_mnist): an End-To-End pipeline to build up a CNN model by PyTorch to recognize fashion image and speed up AI model by Intel® Neural Compressor. * [Intel® Neural Compressor Sample for TensorFlow*](/examples/notebook/tensorflow/alexnet_mnist): an End-To-End pipeline to build up a CNN model by TensorFlow to recognize handwriting number and speed up AI model by Intel® Neural Compressor. * [Accelerate VGG19 Inference on Intel® Gen4 Xeon® Sapphire Rapids](/examples/notebook/tensorflow/vgg19_ibean): an End-To-End pipeline to train VGG19 model by transfer learning based on pre-trained model from [TensorFlow Hub](https://tfhub.dev); quantize it by Intel® Neural Compressor on Intel® Gen4 Xeon® Sapphire Rapids. diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/License.txt b/examples/notebook/pytorch/alexnet_fashion_mnist/License.txt deleted file mode 100644 index 39ae87c3b0b..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/License.txt +++ /dev/null @@ -1,7 +0,0 @@ -Copyright Intel® Corporation - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/README.md b/examples/notebook/pytorch/alexnet_fashion_mnist/README.md deleted file mode 100644 index eaead4041b5..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/README.md +++ /dev/null @@ -1,251 +0,0 @@ -# Intel® Neural Compressor Sample for PyTorch* - - -## Background - -Low-precision inference can significantly speed up inference pipelines. This is achieved by converting an FP32 model to quantized INT8 or BF16 model. Second Generation Intel® Xeon® Scalable Processors (and newer) have Intel® Deep Learning Boost technology, which provides dedicated silicon for speeding up INT8 and BF16 operations. - -Intel® Neural Compressor (INC in short) helps developers in quantizing models, thereby converting an FP32 model into lower precisions like INT8 and BF16. - -At the same time, Intel® Neural Compressor will tune the quantization method to reduce the accuracy loss, which is a big blocker for low-precision inference. - -Intel® Neural Compressor is packaged into Intel® AI Analytics Toolkit and works with Intel® Optimization for PyTorch*. - -Please refer to the official website for detailed info and news: [https://github.com/intel/neural-compressor](https://github.com/intel/neural-compressor) - -## Introduction - -This sample is an End-To-End pipeline which demonstrates the usage specifics of the Intel® Neural Compressor. The pipeline does the following: - -1. Using Pytorch, **Train** an ResNet50 model(CNN) on the Fashion-MNIST dataset. - -2. Using the Intel® Neural Compressor, **quantize** the FP32 Pytorch model file(.pth) to an INT8 model. - -3. **Compare** the inference performance of the FP32 and INT8 model. - - -The sample showcases AI inference performance optimizations delivered by, - -1. Intel® Deep Learning Boost - -2. Intel® Neural Compressor - -## Code - -|Function|Code|Input|Output| -|-|-|-|-| -|Train a CNN AlexNet model|train_mnist.py|dataset: Fashion-MNIST|alexnet_mnist_fp32_mod.pth| -|Quantize the fp32 model file|inc_quantize_model.py|dataset: Fashion-MNIST
model: alexnet_mnist_fp32_mod.pth
yaml: alexnet.yaml|folder: alexnet_mnist_int8_mod| -|Test performance|profiling_inc.py|alexnet_mnist_fp32_mod.pth
alexnet_mnist_int8_mod|32.json
8.json| -|Compare the performance|compare_perf.py|32.json
8.json|stdout/stderr
log file
fp32_int8_absolute.png
fp32_int8_times.png| - -**run_sample.sh** will call above python scripts to finish the demo.
-Bash scripts are placed in 'scripts' directory
-Python files are placed in 'scripts/python_src' directory
- - -## Hardware Environment - -This demo could be executed on any Intel CPU. But it's recommended to use 2nd Generation Intel® Xeon® Scalable Processors or newer, which include: - -1. AVX512 instruction to speed up training & inference of AI models. - -2. Intel® Deep Learning Boost: Vector Neural Network Instruction (VNNI) & [Intel® AMX](https://www.intel.in/content/www/in/en/products/docs/accelerator-engines/advanced-matrix-extensions/overview.html) (Advanced Matrix Extensions) to accelerate AI/DL Inference of INT8/BF16 Model. - -3. Intel® DevCloud - -In case you don't have access to the latest Intel® Xeon® CPU's, you could use the Intel® DevCloud for running this sample.
-Intel® DevCloud offers free access to the newer Intel® hardware.
-To learn more about working with Intel® DevCloud, please refer to [Intel® DevCloud](https://devcloud.intel.com/oneapi/home/) - - -## Running Environment - - -### Intel® DevCloud - - -#### Getting Started with Intel® DevCloud - -This article assumes you are familiar with Intel® DevCloud environment. To learn more about working with Intel® DevCloud, please refer to [Intel® DevCloud](https://www.intel.com/content/www/us/en/developer/tools/devcloud/overview.html). -Specifically, this article assumes: - -1. You have an Intel® DevCloud account. -2. You are familiar with usage of Intel® DevCloud, like login by SSH client or using the Jupyter* lab interface. -3. You are familiar with Python, AI model training and inference based on PyTorch*. - -#### Setup based on Intel® oneAPI AI Analytics Toolkit - -1. SSH to Intel® DevCloud or Open terminal by Jupyter notebook. - -2. Create virtual environment **env_inc**: - -``` -cd neural-compressor/examples/notebook/pytorch/alexnet_fashion_mnist -chmod +x -R scripts/* -bash scripts/devcloud_setup_env.sh -``` -Note : If you are running this for the first time, it could take a while to download all the required packages. - -#### Run the Jupyter Notebook in Intel® DevCloud for oneAPI - -Open **inc_sample_for_pytorch.ipynb** in Jupyter Notebook. Follow the steps in the notebook to complete the sample - - -#### Run in SSH Login Intel® DevCloud for oneAPI - -This demo is intended to show the performance acceleration provided by, -1. [Intel® VNNI](https://cdrdv2-public.intel.com/727804/dl-boost-product-overview.pdf) (Vector Neural Network Instructions). On Intel® DevCloud, choose compute node with the property 'clx' or 'icx' or 'spr'. These node types offer support for Intel® VNNI -2. [Intel® AMX](https://www.intel.in/content/www/in/en/products/docs/accelerator-engines/advanced-matrix-extensions/overview.html) (Advanced Matrix Extensions). On Intel® DevCloud, choose compute node with the property 'spr'. This node type offer support for Intel® AMX - -##### Job Submit -``` -qsub scripts/run_in_intel_devcloud.sh -d `pwd` -l nodes=1:icx:ppn=2 -o output/ -e output/ -``` - -Note: You have to run the above command in the "login node". If you run it on the "compute node" by mistake, the system will throw an error message as below . -``` -qsub: submit error (Bad UID for job execution MSG=ruserok failed validating uXXXXX/uXXXXX from s001-n054.aidevcloud) -``` - -##### Check job status - -``` -qstat -a -``` - -Once the job execution completes (either successfully or error-out), look out for log files in the 'output' directory. Below are two log file names for reference: - -1. **run_in_intel_devcloud.sh.o28029** -2. **run_in_intel_devcloud.sh.e28029** - -##### Check Result - -##### Check Result in Log File - -``` -tail -23 `ls -lAtr run_in_intel_devcloud.sh.o* | tail -1 | awk '{print $9}'` - -``` -Or -Check the result in a log file, like : **run_in_intel_devcloud.sh.o28029**: - -``` -!tail -23 run_in_intel_devcloud.sh.o1842253 - - -Model FP32 INT8 -throughput(fps) xxx.4982883964987 xxx.70552731285 -latency(ms) x.8339174329018104 x.128233714979522 -accuracy(%) 0.x799 0.x796 - -Save to fp32_int8_absolute.png - -Model FP32 INT8 -throughput_times 1 x.293824608282245 -latency_times 1 x.7509864932092611 -accuracy_times 1 0.x996938463108482 - -Save to fp32_int8_times.png -Please check the PNG files to see the performance! -This demo is finished successfully! -Thank you! - -######################################################################## -# End of output for job 1842253.v-qsvr-1.aidevcloud -# Date: Thu 27 Jan 2022 07:05:52 PM PST -######################################################################## - -... - -``` - -The output shows the performance and accuracy of FP32 and INT8 model. - -##### Check Result in PNG file - -The demo saves performance comparison as PNG files: fp32_int8_absolute.png, fp32_int8_times.png - -Copy files from DevCloud in host: - -``` -scp devcloud:~/xxx/*.png ./ -``` - - -### Customer Server - -Set up own running environment in local server, cloud (including Intel® DevCloud): - -#### Install by PyPi - -Create virtual environment **pip_env_inc**: - -``` -pip_set_env.sh -``` -Activate it by: - -``` -source pip_env_inc/bin/activate -``` - -#### Install by Conda - -Create virtual environment **env_inc**: - -``` -conda_set_env.sh -``` - -Activate it by: - -``` -conda activate env_inc -``` - -#### Run by SSH - -``` -bash scripts/run_sample.sh -``` - -1. Check the result in screen print out: -``` -... - -Model FP32 INT8 -throughput(fps) xxx.4982883964987 xxx.70552731285 -latency(ms) x.8339174329018104 x.128233714979522 -accuracy(%) 0.x799 0.x796 - -Save to fp32_int8_absolute.png - -Model FP32 INT8 -throughput_times 1 x.293824608282245 -latency_times 1 x.7509864932092611 -accuracy_times 1 x.9996938463108482 - -Save to fp32_int8_times.png -Please check the PNG files to see the performance! -This demo is finished successfully! -Thank you! -... - -``` -We will see the performance and accuracy of FP32 and INT8 model. The performance could be obviously increased if running on Xeon with VNNI. - -2. Check Result in PNG file - -The demo creates figure files: fp32_int8_absolute.png, fp32_int8_times.png to show performance bar. They could be used in report. - -#### Run by Jupyter Notebook - -Please open **inc_sample_for_pytorch.ipynb** in Jupyter Notebook. - -Following the guide of chapter **Run in Customer Server or Cloud** to run this demo. - -## License - -Code samples are licensed under the MIT license. See -[License.txt](License.txt) for details. \ No newline at end of file diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/inc_sample_for_pytorch.ipynb b/examples/notebook/pytorch/alexnet_fashion_mnist/inc_sample_for_pytorch.ipynb deleted file mode 100644 index 8783955ab03..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/inc_sample_for_pytorch.ipynb +++ /dev/null @@ -1,310 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "e37ea7a6", - "metadata": {}, - "source": [ - "# Intel® Neural Compressor Sample for PyTorch*" - ] - }, - { - "cell_type": "markdown", - "id": "5fda805e-a405-4fa4-9a11-d890206a27d7", - "metadata": { - "tags": [] - }, - "source": [ - "## Introduction\n", - "\n", - "This sample is an End-To-End pipeline which demonstrates the usage specifics of the Intel® Neural Compressor. The pipeline does the following:\n", - "\n", - "1. Using Pytorch, **Train** an AlexNet model(CNN) on the Fashion-MNIST dataset.\n", - "\n", - "2. Using the Intel® Neural Compressor, **quantize** the FP32 Pytorch model file(.pth) to an INT8 model.\n", - "\n", - "3. **Compare** the inference performance of the FP32 and INT8 model." - ] - }, - { - "cell_type": "markdown", - "id": "e538cd95-f291-41aa-9b48-00956855aec1", - "metadata": { - "tags": [] - }, - "source": [ - "## Code\n", - "Please refer to [README.md](README.md)." - ] - }, - { - "cell_type": "markdown", - "id": "71858ff2-c7b6-425e-a7c4-eff227cc481e", - "metadata": {}, - "source": [ - "## Prepare Running Environment\n", - "\n", - "Please refer to [README.md](README.md)." - ] - }, - { - "cell_type": "markdown", - "id": "735982ec-2398-479b-a927-01d7e9f30ea1", - "metadata": {}, - "source": [ - "### Remove all old output files (Optional)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c7ca46e-0fc8-4818-ac57-d354414ee6d4", - "metadata": {}, - "outputs": [], - "source": [ - "# !!! CAREFUL !!!, this will delete output data from your previous runs\n", - "!rm -rf output/*" - ] - }, - { - "cell_type": "markdown", - "id": "82f09276", - "metadata": {}, - "source": [ - "## Run in Intel® DevCloud\n", - "\n", - "Job submit to compute node with the property 'clx' or 'icx' or 'spr' which support Intel® Deep Learning Boost (avx512_vnni)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f51bc091", - "metadata": {}, - "outputs": [], - "source": [ - "!qsub scripts/run_in_intel_devcloud.sh -d `pwd` -l nodes=1:icx:ppn=2 -o output/ -e output/" - ] - }, - { - "cell_type": "markdown", - "id": "5f0d7cab-1b60-4689-b153-506e5818b811", - "metadata": {}, - "source": [ - "Check job status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7199754d-e7e4-4e52-868d-0a1ca79cb064", - "metadata": {}, - "outputs": [], - "source": [ - "!qstat -a" - ] - }, - { - "cell_type": "markdown", - "id": "4bc5b605-47d1-485f-bfb1-cd7ab9f3f83c", - "metadata": {}, - "source": [ - "### Check Result\n", - "\n", - "#### Check Result in Log File\n", - "Check the latest created log file with prefix: **run_in_intel_devcloud.sh.o**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6b36c9c-f612-4517-914c-d5ca6ee92d27", - "metadata": {}, - "outputs": [], - "source": [ - "!tail -23 `ls -lAtr output/run_in_intel_devcloud.sh.o* | tail -1 | awk '{print $9}'`" - ] - }, - { - "cell_type": "markdown", - "id": "9a6a6651-cb8f-4667-9ddf-3b215ad35a00", - "metadata": {}, - "source": [ - "Check the error logs with prefix: **run_in_intel_devcloud.sh.e**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7b69a349-0d84-4a25-af24-7fc6fd8a64c8", - "metadata": {}, - "outputs": [], - "source": [ - "!tail -40 `ls -lAtr output/run_in_intel_devcloud.sh.e* | tail -1 | awk '{print $9}'`" - ] - }, - { - "cell_type": "markdown", - "id": "a80934c4-8ddd-48c3-acc5-63dc0bb1372a", - "metadata": {}, - "source": [ - "#### Check Result in PNG file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "44c31db2", - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import Image, display\n", - "\n", - "listOfImageNames = ['output/fp32_int8_aboslute.png',\n", - " 'output/fp32_int8_times.png']\n", - "\n", - "for imageName in listOfImageNames:\n", - " display(Image(filename=imageName))" - ] - }, - { - "cell_type": "markdown", - "id": "0b4cded5-3723-42e5-aec1-8ec514ccd49e", - "metadata": {}, - "source": [ - "## Run in Customer Server or Cloud\n", - "\n", - "Note, it's recommended to use 2nd Generation Intel® Xeon® Scalable Processors or newer to get better performance improvement.\n", - "\n", - "### Run in Jupyter Notebook\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "741279c7-f788-47f1-ab9a-8f0628a79d16", - "metadata": {}, - "outputs": [], - "source": [ - "!./run_sample.sh" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d53543c9", - "metadata": {}, - "outputs": [], - "source": [ - "!ls" - ] - }, - { - "cell_type": "markdown", - "id": "b3cb8011-31c4-4a7c-be00-775d2ec940f4", - "metadata": {}, - "source": [ - "### Check Result\n", - "\n", - "#### Check Result in Screen Output\n", - "\n", - "Example:\n", - "\n", - "```\n", - "...\n", - "\n", - "Compare the Performance of FP32 and INT8 Models\n", - "Model FP32 INT8 \n", - "throughput(fps) xxx.4982883964987 xxx.52236638019 \n", - "latency(ms) x.8339174329018104 x.9863116497896156 \n", - "accuracy(%) 0.x799 0.x796 \n", - "\n", - "Save to fp32_int8_aboslute.png\n", - "\n", - "Model FP32 INT8 \n", - "throughput_times 1 x.621889936815179 \n", - "latency_times 1 0.x009066766478504 \n", - "accuracy_diff(%) 0 -0.x29999999999986926 \n", - "\n", - "Save to fp32_int8_times.png\n", - "Check the output PNG files for performance comparison!\n", - "Demo execution completed successfully! Check output directory for results.\n", - "Thank you!\n", - "...\n", - "\n", - "```\n", - "#### Check Result in PNG file\n", - "\n", - "The demo creates figure files: fp32_int8_aboslute.png, fp32_int8_times.png to show performance bar. They could be used in report." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "82c4f0b7-2451-41db-bd84-0fc26e74aab2", - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import Image, display\n", - "\n", - "listOfImageNames = ['fp32_int8_aboslute.png',\n", - " 'fp32_int8_times.png']\n", - "\n", - "for imageName in listOfImageNames:\n", - " display(Image(filename=imageName))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f882b021-190d-438e-9cc8-f76b501c6be5", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (Intel® oneAPI 2023.0)", - "language": "python", - "name": "c009-intel_distribution_of_python_3_oneapi-beta05-python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.15" - }, - "nbTranslate": { - "displayLangs": [ - "*" - ], - "hotkey": "alt-t", - "langInMainMenu": true, - "sourceLang": "en", - "targetLang": "fr", - "useGoogleTranslate": true - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/output/readme.txt b/examples/notebook/pytorch/alexnet_fashion_mnist/output/readme.txt deleted file mode 100644 index 902478c2a45..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/output/readme.txt +++ /dev/null @@ -1 +0,0 @@ -This directory will hold all the output files generated while running this Intel Neural Compressor sample. diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/conda_set_env.sh b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/conda_set_env.sh deleted file mode 100755 index c82a5b54b97..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/conda_set_env.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -ENV_NAME=env_inc -conda deactivate -conda env remove -n $ENV_NAME -conda create -n $ENV_NAME python=3.9 pip pytorch=1.13.1 torchvision=0.14.1 torchaudio=0.13.1 cpuonly neural-compressor=2.0 matplotlib jupyter -y -c conda-forge -c pytorch -c intel -conda activate $ENV_NAME -python -m ipykernel install --user --name $ENV_NAME \ No newline at end of file diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/devcloud_setup_env.sh b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/devcloud_setup_env.sh deleted file mode 100755 index ded2bccac4c..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/devcloud_setup_env.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source /opt/intel/oneapi/setvars.sh -ENV_NAME=env_inc -conda deactivate -conda env remove -n $ENV_NAME -conda create -n $ENV_NAME python=3.9 pip pytorch=1.13.1 torchvision=0.14.1 torchaudio=0.13.1 cpuonly neural-compressor=2.0 matplotlib -y -c conda-forge -c pytorch -c intel diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/pip_set_env.sh b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/pip_set_env.sh deleted file mode 100755 index 735f07788e9..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/pip_set_env.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -ENV_NAME=pip_env_inc -deactivate -rm -rf $ENV_NAME -python3 -m venv $ENV_NAME -source $ENV_NAME/bin/activate -pip3 install --upgrade pip -pip3 install -r requirements.txt - - diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/alexnet.py b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/alexnet.py deleted file mode 100755 index 42ded6d333f..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/alexnet.py +++ /dev/null @@ -1,130 +0,0 @@ -import os -import numpy as np - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -from torchvision import datasets, transforms -from torch.optim.lr_scheduler import StepLR -from neural_compressor.utils.pytorch import load -import fashion_mnist - - -class Net(nn.Module): - def __init__(self, num_classes = 10, dropout: float = 0.5) -> None: - super().__init__() - self.features = nn.Sequential( - nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=2), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=2, stride=2), - nn.Conv2d(96, 256, kernel_size=5, padding=2), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=2, stride=2), - nn.Conv2d(256, 384, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(384, 384, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(384, 256, kernel_size=3, padding=1), - nn.ReLU(inplace=True) - ) - self.classifier = nn.Sequential( - nn.Linear(256, num_classes), - nn.LogSoftmax(dim=1) - ) - - - def forward(self, x: torch.Tensor) -> torch.Tensor: - x = self.features(x) - x = torch.flatten(x, 1) - x = self.classifier(x) - return x - -def save_mod(model, model_file): - print('Save to {}'.format(model_file)) - torch.save(model.state_dict(), model_file) - -def load_mod(model_file): - model = Net() - model.load_state_dict(torch.load(model_file)) - print('Load from {}'.format(model_file)) - return model - -def load_int8_mod(model_folder): - model = Net() - int8_model = load(model_folder, model) - print('Load from {}'.format(model_folder)) - return int8_model - -def data_loader(batch_size=200): - train_loader, test_loader = fashion_mnist.data_loader(batch_size) - return train_loader, test_loader - -def do_test_mod(model, test_loader): - test_loss = 0 - accuracy = 0 - - with torch.no_grad(): - model.eval() - for images, labels in test_loader: - log_ps = model(images) - test_loss += F.nll_loss(log_ps, labels) - - ps = torch.exp(log_ps) - top_p, top_class = ps.topk(1, dim = 1) - equals = top_class == labels.view(*top_class.shape) - accuracy += torch.mean(equals.type(torch.FloatTensor)) - accuracy = (accuracy/len(test_loader)).numpy().item() - test_loss = (test_loss/len(test_loader)).numpy().item() - - return test_loss, accuracy - -def test_mod(model, test_loader): - print("Testing ...") - test_loss, accuracy = do_test_mod(model, test_loader) - print("Test loss: {:.3f}..".format(test_loss), - "Test Accuracy: {:.3f}".format(accuracy)) - - -def train_mod(model, train_loader, test_loader, optimizer, epochs=3): - print("Training ...") - model.train() - running_loss = 0 - train_len = len(train_loader) - - for epoch in range(1, epochs + 1): - for i, (images, labels) in enumerate(train_loader): - optimizer.zero_grad() - output = model.forward(images) - loss = F.nll_loss(output, labels) - loss.backward() - optimizer.step() - running_loss += loss.item() - - print("Epoch {}/{} Iteration {}/{} Loss {:.6f}".format(epoch, epochs, i, train_len, \ - running_loss/(i+1)), end='\r') - - - test_loss, accuracy = do_test_mod(model, test_loader) - - print('\nTrain Epoch: {} Epoch {} Samples \tLoss: {:.6f} Test Loss: {:.6f} Accuracy: {:.6f}'.format( - epoch, len(train_loader.sampler), - running_loss/len(train_loader), test_loss, accuracy)) - print("Done") - -def main(): - train_loader, test_loader = data_loader() - - model = Net() - print(model) - - optimizer = optim.Adam(model.parameters(), lr = 0.002) - - epochs = 1 - train_mod(model, train_loader, test_loader, optimizer, epochs) - test_mod(model, test_loader) - - save_mod(model, "alexnet_mnist_fp32_mod.th") - -if __name__ == "__main__": - main() diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/compare_perf.py b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/compare_perf.py deleted file mode 100755 index d5f83e1aa35..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/compare_perf.py +++ /dev/null @@ -1,82 +0,0 @@ -import json -import matplotlib.pyplot as plt - - -def autolabel(ax, rects): - """ - Attach a text label above each bar displaying its height - """ - for rect in rects: - height = rect.get_height() - ax.text(rect.get_x() + rect.get_width()/2., 1.05*height, - '%0.4f' % float(height), - ha='center', va='bottom') - -def draw_bar(x, t, y, subplot, color, x_lab, y_lab, width=0.2): - plt.subplot(subplot) - plt.xticks(x, t) - ax1 = plt.gca() - ax1.set_xlabel(x_lab) - ax1.set_ylabel(y_lab, color=color) - rects1 = ax1.bar(x, y, color=color, width=width) - ax1.tick_params(axis='y', labelcolor=color) - autolabel(ax1, rects1) - -def fix_len(name, length): - if len(name)dst_ver: - return 1 - if src_ver=0: - print(f"Compatible Intel Neural Compressor version detected : v{inc.__version__} ") - else: - raise Exception(f"Installed Intel Neural Compressor version[v{inc.__version__}] is NOT compatible. Please upgrade to version 2.0 or higher.") - - q_model = auto_tune(fp32_model_file, batch_size) - q_model.save(int8_model) - print("Save int8 model to {}".format(int8_model)) - -if __name__ == "__main__": - main() diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/profiling_inc.py b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/profiling_inc.py deleted file mode 100755 index 93f5ac9d489..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/profiling_inc.py +++ /dev/null @@ -1,85 +0,0 @@ -import numpy as np -import time -import argparse -import os -import json -import torch - -import alexnet - - -def infer_perf(index, model_file): - train_loader, test_loader = alexnet.data_loader(batch_size=100) - - if index=='8': - model = alexnet.load_int8_mod(model_file) - else: - model = alexnet.load_mod(model_file) - - accuracy = 0 - test_loss, accuracy = alexnet.do_test_mod(model, test_loader) - print('accuracy:', accuracy) - - - throughput = 0 - times = 10 - warmup = 2 - infer_time = 0.0 - with torch.no_grad(): - model.eval() - for i in range(times): - bt = time.time() - for images, labels in test_loader: - log_ps = model(images) - et = time.time() - if i>=warmup: - infer_time += (et-bt) - - print("batch_size {}".format(test_loader.batch_size)) - throughput = test_loader.batch_size* len(test_loader) / (et - bt) /(times-warmup) - print('max throughput(fps):', throughput) - - # latency when BS=1 - warmup = len(test_loader)*0.2 - bt = 0 - infer_time = 0.0 - train_loader, test_loader = alexnet.data_loader(batch_size=1) - - for i,(images, labels) in enumerate(test_loader): - bt = time.time() - log_ps = model(images) - et = time.time() - if i >= warmup: - infer_time += (et-bt) - - latency = infer_time * 1000 / (len(test_loader) - warmup) - print("run times {}".format(times-warmup)) - print('latency(ms):', latency) - - return accuracy, throughput, latency - - -def save_res(index, result): - accuracy, throughput, latency = result - res = {} - res['accuracy'] = accuracy - res['throughput'] = throughput - res['latency'] = latency - - outfile = "../output/" + str(index) + ".json" - with open(outfile, 'w') as f: - json.dump(res, f) - print("Save result to {}".format(outfile)) - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--index', type=str, help='file name of output', required=True) - - parser.add_argument('--input-graph', type=str, help='file name for graph', required=True) - - args = parser.parse_args() - - save_res(args.index, infer_perf(args.index, args.input_graph)) - -if __name__ == "__main__": - main() diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/train_alexnet_fashion_mnist.py b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/train_alexnet_fashion_mnist.py deleted file mode 100755 index fb25315a506..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/python_src/train_alexnet_fashion_mnist.py +++ /dev/null @@ -1,21 +0,0 @@ -import os -import numpy as np - -import torch.optim as optim - -import alexnet - -def main(): - train_loader, test_loader = alexnet.data_loader() - - model = alexnet.Net() - optimizer = optim.Adam(model.parameters(), lr = 0.002) - - epochs = 1 - alexnet.train_mod(model, train_loader, test_loader, optimizer, epochs) - alexnet.test_mod(model, test_loader) - - alexnet.save_mod(model, "../output/alexnet_mnist_fp32_mod.pth") - -if __name__ == "__main__": - main() diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/requirements.txt b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/requirements.txt deleted file mode 100644 index ef6bf8858e4..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ ---find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.13.1+cpu -torchvision==0.14.1+cpu -neural-compressor==2.0 -runipy -notebook -matplotlib \ No newline at end of file diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_in_intel_devcloud.sh b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_in_intel_devcloud.sh deleted file mode 100755 index 6195ba75e86..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_in_intel_devcloud.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -echo "Enable Conda Env." -source /opt/intel/oneapi/setvars.sh -conda activate env_inc -export PYTHONPATH=$(find $CONDA_PREFIX -type d -name "site-packages" | head -n 1) -cd scripts -bash run_sample.sh diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_jupyter.sh b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_jupyter.sh deleted file mode 100755 index d6f489e332c..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_jupyter.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -jupyter notebook --notebook-dir=./ --ip=0.0.0.0 --no-browser --allow-root & diff --git a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_sample.sh b/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_sample.sh deleted file mode 100755 index 07ee8b215ec..00000000000 --- a/examples/notebook/pytorch/alexnet_fashion_mnist/scripts/run_sample.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -echo "Note: This script relies on relevant Python environment for its execution. Refer README.md for more details." - -echo "Train Pytorch AlexNet model on Fashion MNIST dataset" -python python_src/train_alexnet_fashion_mnist.py - -FP32_FILE="../output/alexnet_mnist_fp32_mod.pth" -if [ ! -f $FP32_FILE ]; then - echo "$FP32_FILE - model file does not exist" - echo "Model training failed, exiting!. Check error logs for details" - exit 1 -else - echo "Model training has completed successfully" -fi - -echo "Quantize Model using Intel Neural Compressor" -python python_src/inc_quantize_model.py - -INT8_FOLDER="../output/alexnet_mnist_int8_mod" -if [ ! -d $INT8_FOLDER ]; then - echo "$INT8_FOLDER not exists." - echo "Model quantization has failed, exiting!. Check error logs for details" - exit 1 -else - echo "Model quantization has completed successfully" -fi - -echo "Execute the profiling_inc.py with FP32 model file" -python python_src/profiling_inc.py --input-graph=../output/alexnet_mnist_fp32_mod.pth --index=32 -echo "FP32 model performance test has completed successfully" - -echo "Execute the profiling_inc.py with INT8 model file" -python python_src/profiling_inc.py --input-graph=../output/alexnet_mnist_int8_mod --index=8 -echo "INT8 model performance test has completed successfully" - -echo "Comparing the Performance of FP32 and INT8 Models" -python python_src/compare_perf.py -echo "Check the output PNG files for performance comparison!" - -if [[ $? -eq 0 ]]; then - echo "Demo execution completed successfully! Check output directory for results." -else - echo "Demo execution has failed! Check error logs for more details." -fi - -echo "Thank you!" -exit 0 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/QSL.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/QSL.py deleted file mode 100644 index 9c0abe4e734..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/QSL.py +++ /dev/null @@ -1,68 +0,0 @@ -import sys -import os -sys.path.insert(0, os.path.join(os.getcwd(), "pytorch")) - -from parts.manifest import Manifest -from parts.segment import AudioSegment - -import numpy as np - -import mlperf_loadgen as lg - - -class AudioQSL: - def __init__(self, dataset_dir, manifest_filepath, labels, - sample_rate=16000, perf_count=None): - m_paths = [manifest_filepath] - self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels), - normalize=True, max_duration=15.0) - self.sample_rate = sample_rate - self.count = len(self.manifest) - perf_count = self.count if perf_count is None else perf_count - self.sample_id_to_sample = {} - self.qsl = lg.ConstructQSL(self.count, perf_count, - self.load_query_samples, - self.unload_query_samples) - print( - "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format( - self.manifest.duration / 3600, - self.manifest.filtered_duration / 3600, - self.count)) - - def load_query_samples(self, sample_list): - for sample_id in sample_list: - self.sample_id_to_sample[sample_id] = self._load_sample(sample_id) - - def unload_query_samples(self, sample_list): - for sample_id in sample_list: - del self.sample_id_to_sample[sample_id] - - def _load_sample(self, index): - sample = self.manifest[index] - segment = AudioSegment.from_file(sample['audio_filepath'][0], - target_sr=self.sample_rate) - waveform = segment.samples - assert isinstance(waveform, np.ndarray) and waveform.dtype == np.float32 - return waveform - - def __getitem__(self, index): - return self.sample_id_to_sample[index] - - def __del__(self): - lg.DestroyQSL(self.qsl) - print("Finished destroying QSL.") - -# We have no problem fitting all data in memory, so we do that, in -# order to speed up execution of the benchmark. -class AudioQSLInMemory(AudioQSL): - def __init__(self, dataset_dir, manifest_filepath, labels, - sample_rate=16000, perf_count=None): - super().__init__(dataset_dir, manifest_filepath, labels, - sample_rate, perf_count) - super().load_query_samples(range(self.count)) - - def load_query_samples(self, sample_list): - pass - - def unload_query_samples(self, sample_list): - pass diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/README.md b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/README.md deleted file mode 100644 index 9cc32ad3df5..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/README.md +++ /dev/null @@ -1,75 +0,0 @@ -Step-by-Step -============ - -This document lists steps of reproducing Intel Optimized PyTorch RNNT models tuning results via Neural Compressor. - -Our example comes from MLPerf Inference Benchmark Suite. - - -# Prerequisite - -## 1. Environment - Python 3.6 or higher version is recommended. - - ```shell - cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx - pip install -r requirements.txt - ``` - Check your gcc version with the command: **gcc -v** - - GCC5 or above is required. - - ```shell - # install mlperf - bash prepare_loadgen.sh - ``` - -## 2. Prepare Dataset - - ```shell - bash prepare_dataset.sh --download_dir=origin_dataset --convert_dir=convert_dataset - ``` - - prepare_dataset.sh contains two stages: - - stage1: download LibriSpeech/dev-clean dataset and extract it. - - stage2: convert .flac file to .wav file - -## 3. Prepare Pre-trained Model - - ```shell - wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O rnnt.pt - ``` - -# Run - -## 1. Enable RNNT example with the auto dynamic quantization strategy of Neural Compressor. - - The changes made are as follows: - 1. pytorch_SUT.py: - Removed jit script conversion. - 2. pytorch/decoders.py: - Removed assertion of torch.jit.ScriptModule. - -## 2. Tuning command: -```shell -bash run_tuning.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --output_model=saved_results -``` -## 3. Benchmark command: -```shell -# fp32 -bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=false -# int8 -bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=true -``` -## 4. Brief output information: - -The first part is accuracy/percentage, right part is time_usage/second. - - - FP32 baseline is: [92.5477, 796.7552]. - - Tune 1 result is: [91.5872, 1202.2529] - - Tune 2 result is: [91.5894, 1201.3231] - - Tune 3 result is: [91.5195, 1211.5965] - - Tune 4 result is: [91.6030, 1218.2211] - - Tune 5 result is: [91.4812, 1169.5080] - - ... - diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/accuracy_eval.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/accuracy_eval.py deleted file mode 100644 index 4341900c536..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/accuracy_eval.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python - -import argparse -import array -import json -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "pytorch")) - -from helpers import process_evaluation_epoch, __gather_predictions -from parts.manifest import Manifest - -dtype_map = { - "int8": 'b', - "int16": 'h', - "int32": 'l', - "int64": 'q', -} - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log_dir", required=True) - parser.add_argument("--dataset_dir", required=True) - parser.add_argument("--manifest", required=True) - parser.add_argument("--output_dtype", default="int64", choices=dtype_map.keys(), help="Output data type") - args = parser.parse_args() - return args - -def main(): - args = get_args() - labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] - manifest = Manifest(args.dataset_dir, [args.manifest], labels, len(labels), normalize=True, max_duration=15.0) - with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh: - results = json.load(fh) - hypotheses = [] - references = [] - for result in results: - hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist()) - references.append(manifest[result["qsl_idx"]]["transcript"]) - - references = __gather_predictions([references], labels=labels) - hypotheses = __gather_predictions([hypotheses], labels=labels) - - d = dict(predictions=hypotheses, - transcripts=references) - wer = process_evaluation_epoch(d) - print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100)) - -if __name__ == '__main__': - main() diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/mlperf.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/mlperf.conf deleted file mode 100644 index 7f5b55b58e2..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/mlperf.conf +++ /dev/null @@ -1,65 +0,0 @@ -# The format of this config file is 'key = value'. -# The key has the format 'model.scenario.key'. Value is mostly int64_t. -# Model maybe '*' as wildcard. In that case the value applies to all models. -# All times are in milli seconds - -# Set performance_sample_count for each model. -# User can optionally set this to higher values in user.conf. -mobilenet.*.performance_sample_count_override = 1024 -gnmt.*.performance_sample_count_override = 3903900 -resnet50.*.performance_sample_count_override = 1024 -ssd-mobilenet.*.performance_sample_count_override = 256 -ssd-resnet34.*.performance_sample_count_override = 64 -bert.*.performance_sample_count_override = 10833 -dlrm.*.performance_sample_count_override = 204800 -rnnt.*.performance_sample_count_override = 2513 -3d-unet.*.performance_sample_count_override = 16 - -# Set seeds. The seeds will be distributed two weeks before the submission. -*.*.qsl_rng_seed = 12786827339337101903 -*.*.sample_index_rng_seed = 12640797754436136668 -*.*.schedule_rng_seed = 3135815929913719677 - -*.SingleStream.target_latency_percentile = 90 -*.SingleStream.min_duration = 60000 -*.SingleStream.min_query_count = 1024 - -*.MultiStream.target_qps = 20 -*.MultiStream.target_latency_percentile = 99 -*.MultiStream.max_async_queries = 1 -*.MultiStream.target_latency = 50 -*.MultiStream.min_duration = 60000 -*.MultiStream.min_query_count = 270336 -ssd-resnet34.MultiStream.target_qps = 15 -ssd-resnet34.MultiStream.target_latency = 66 -gnmt.MultiStream.min_query_count = 90112 -gnmt.MultiStream.target_latency = 100 -gnmt.MultiStream.target_qps = 10 -gnmt.MultiStream.target_latency_percentile = 97 - -*.Server.target_latency = 10 -*.Server.target_latency_percentile = 99 -*.Server.target_duration = 0 -*.Server.min_duration = 60000 -*.Server.min_query_count = 270336 -resnet50.Server.target_latency = 15 -ssd-resnet34.Server.target_latency = 100 -gnmt.Server.min_query_count = 90112 -gnmt.Server.target_latency = 250 -gnmt.Server.target_latency_percentile = 97 -bert.Server.target_latency = 130 -dlrm.Server.target_latency = 30 -rnnt.Server.target_latency = 1000 - -*.Offline.target_latency_percentile = 90 -*.Offline.min_duration = 60000 -# In Offline scenario, we always have one query. But LoadGen maps this to -# min_sample_count internally in Offline scenario, so set this to 24576 since -# the rule requires that Offline scenario run for at least 24576 samples. -*.Offline.min_query_count = 24576 - -# These fields should be defined and overridden by user.conf. -*.SingleStream.target_latency = 10 -*.Server.target_qps = 1.0 -*.Offline.target_qps = 1.0 -*.MultiStream.samples_per_query = 4 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_dataset.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_dataset.sh deleted file mode 100644 index 2c517ee70ba..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_dataset.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - prepare_dataset - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --download_dir=*) - download_dir=$(echo $var |cut -f2 -d=) - ;; - --convert_dir=*) - convert_dir=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - - mkdir -p $download_dir $convert_dir -} - -# prepare_dataset -function prepare_dataset { - # if you already have origin dataset, set stage=2, make sure to extract it \ - # and change the origin dataset path to your path - stage=1 - - # Download dataset - if [[ $stage -le 1 ]]; then - python pytorch/utils/download_librispeech.py \ - pytorch/utils/librispeech-inference.csv \ - $download_dir \ - -e $download_dir - fi - - # Convert dataset - if [[ $stage -le 2 ]]; then - python pytorch/utils/convert_librispeech.py \ - --input_dir $download_dir/LibriSpeech/dev-clean \ - --dest_dir $convert_dir/dev-clean-wav \ - --output_json $convert_dir/dev-clean-wav.json - fi -} - -main "$@" \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_loadgen.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_loadgen.sh deleted file mode 100644 index e30eea60442..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_loadgen.sh +++ /dev/null @@ -1,10 +0,0 @@ -pushd . -echo "Install loadgen" -git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference -cd mlperf_inference -git checkout r2.1 -git log -1 -git submodule update --init --recursive -cd loadgen -CFLAGS="-std=c++14" python setup.py install -popd diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/Dockerfile b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/Dockerfile deleted file mode 100644 index 1cb52bf6261..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3 -FROM ${FROM_IMAGE_NAME} - - -RUN apt-get update && apt-get install -y libsndfile1 && apt-get install -y sox && rm -rf /var/lib/apt/lists/* - -RUN COMMIT_SHA=c6d12f9e1562833c2b4e7ad84cb22aa4ba31d18c && \ - git clone https://github.com/HawkAaron/warp-transducer deps/warp-transducer && \ - cd deps/warp-transducer && \ - git checkout $COMMIT_SHA && \ - mkdir build && \ - cd build && \ - cmake .. && \ - make VERBOSE=1 && \ - export CUDA_HOME="/usr/local/cuda" && \ - export WARP_RNNT_PATH=`pwd` && \ - export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME && \ - export LD_LIBRARY_PATH="$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH" && \ - export LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH && \ - export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH && \ - export CFLAGS="-I$CUDA_HOME/include $CFLAGS" && \ - cd ../pytorch_binding && \ - python3 setup.py install --user && \ - rm -rf ../tests test ../tensorflow_binding && \ - cd ../../.. - -WORKDIR /workspace/jasper - -COPY requirements.txt . -RUN pip install --disable-pip-version-check -U -r requirements.txt - -COPY . . diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/LICENSE deleted file mode 100644 index 75ee157cd96..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/LICENSE +++ /dev/null @@ -1,204 +0,0 @@ - Except where otherwise noted, the following license applies to all files in this repo. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2019 NVIDIA Corporation - Copyright 2019 Myrtle Software Limited, www.myrtle.ai - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/configs/rnnt.toml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/configs/rnnt.toml deleted file mode 100644 index a4cd1dfb470..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/configs/rnnt.toml +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model = "RNNT" - -[input] -normalize = "per_feature" -sample_rate = 16000 -window_size = 0.02 -window_stride = 0.01 -window = "hann" -features = 80 -n_fft = 512 -frame_splicing = 3 -dither = 0.00001 -feat_type = "logfbank" -normalize_transcripts = true -trim_silence = true -pad_to = 0 # TODO -max_duration = 16.7 -speed_perturbation = true - - -cutout_rect_regions = 0 -cutout_rect_time = 60 -cutout_rect_freq = 25 - - -cutout_x_regions = 2 -cutout_y_regions = 2 -cutout_x_width = 6 -cutout_y_width = 6 - - -[input_eval] -normalize = "per_feature" -sample_rate = 16000 -window_size = 0.02 -window_stride = 0.01 -window = "hann" -features = 80 -n_fft = 512 -frame_splicing = 3 -dither = 0.00001 -feat_type = "logfbank" -normalize_transcripts = true -trim_silence = true -pad_to = 0 - - -[rnnt] -rnn_type = "lstm" -encoder_n_hidden = 1024 -encoder_pre_rnn_layers = 2 -encoder_stack_time_factor = 2 -encoder_post_rnn_layers = 3 -pred_n_hidden = 320 -pred_rnn_layers = 2 -forget_gate_bias = 1.0 -joint_n_hidden = 512 -dropout=0.32 - - -[labels] -labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/dataset.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/dataset.py deleted file mode 100644 index 7b9036f1c55..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/dataset.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This file contains classes and functions related to data loading -""" -from collections import namedtuple -import torch -import numpy as np -from torch.utils.data import Dataset -from parts.manifest import Manifest -from parts.features import WaveformFeaturizer - - -def seq_collate_fn(batch): - """batches samples and returns as tensors - Args: - batch : list of samples - Returns - batches of tensors - """ - audio_lengths = torch.LongTensor([sample.waveform.size(0) - for sample in batch]) - transcript_lengths = torch.LongTensor([sample.transcript.size(0) - for sample in batch]) - permute_indices = torch.argsort(audio_lengths, descending=True) - - audio_lengths = audio_lengths[permute_indices] - transcript_lengths = transcript_lengths[permute_indices] - padded_audio_signals = torch.nn.utils.rnn.pad_sequence( - [batch[i].waveform for i in permute_indices], - batch_first=True - ) - transcript_list = [batch[i].transcript - for i in permute_indices] - packed_transcripts = torch.nn.utils.rnn.pack_sequence(transcript_list, - enforce_sorted=False) - - # TODO: Don't I need to stop grad at some point now? - return (padded_audio_signals, audio_lengths, transcript_list, - packed_transcripts, transcript_lengths) - - -class AudioToTextDataLayer: - """Data layer with data loader - """ - - def __init__(self, **kwargs): - featurizer_config = kwargs['featurizer_config'] - pad_to_max = kwargs.get('pad_to_max', False) - perturb_config = kwargs.get('perturb_config', None) - manifest_filepath = kwargs['manifest_filepath'] - dataset_dir = kwargs['dataset_dir'] - labels = kwargs['labels'] - batch_size = kwargs['batch_size'] - drop_last = kwargs.get('drop_last', False) - shuffle = kwargs.get('shuffle', True) - min_duration = featurizer_config.get('min_duration', 0.1) - max_duration = featurizer_config.get('max_duration', None) - normalize_transcripts = kwargs.get('normalize_transcripts', True) - trim_silence = kwargs.get('trim_silence', False) - sampler_type = kwargs.get('sampler', 'default') - speed_perturbation = featurizer_config.get('speed_perturbation', False) - sort_by_duration = sampler_type == 'bucket' - self._featurizer = WaveformFeaturizer.from_config( - featurizer_config, perturbation_configs=perturb_config) - self._dataset = AudioDataset( - dataset_dir=dataset_dir, - manifest_filepath=manifest_filepath, - labels=labels, blank_index=len(labels), - sort_by_duration=sort_by_duration, - pad_to_max=pad_to_max, - featurizer=self._featurizer, max_duration=max_duration, - min_duration=min_duration, normalize=normalize_transcripts, - trim=trim_silence, speed_perturbation=speed_perturbation) - - print('sort_by_duration', sort_by_duration) - - self._dataloader = torch.utils.data.DataLoader( - dataset=self._dataset, - batch_size=batch_size, - collate_fn=lambda b: seq_collate_fn(b), - drop_last=drop_last, - shuffle=shuffle, - num_workers=0, - pin_memory=True, - sampler=None - ) - - def __len__(self): - return len(self._dataset) - - @property - def data_iterator(self): - return self._dataloader - - -class AudioDataset(Dataset): - def __init__(self, dataset_dir, manifest_filepath, labels, featurizer, max_duration=None, pad_to_max=False, - min_duration=None, blank_index=0, max_utts=0, normalize=True, sort_by_duration=False, - trim=False, speed_perturbation=False): - """Dataset that loads tensors via a json file containing paths to audio files, transcripts, and durations - (in seconds). Each entry is a different audio sample. - Args: - dataset_dir: absolute path to dataset folder - manifest_filepath: relative path from dataset folder to manifest json as described above. - labels: String containing all the possible characters to map to - featurizer: Initialized featurizer class that converts paths of audio to feature tensors - max_duration: If audio exceeds this length, do not include in dataset - min_duration: If audio is less than this length, do not include in dataset - pad_to_max: if specified input sequences into dnn model will be padded to max_duration - blank_index: blank index for ctc loss / decoder - max_utts: Limit number of utterances - normalize: whether to normalize transcript text - sort_by_duration: whether or not to sort sequences by increasing duration - trim: if specified trims leading and trailing silence from an audio signal. - speed_perturbation: specify if using data contains speed perburbation - """ - m_paths = [manifest_filepath] - self.manifest = Manifest(dataset_dir, m_paths, labels, blank_index, pad_to_max=pad_to_max, - max_duration=max_duration, - sort_by_duration=sort_by_duration, - min_duration=min_duration, max_utts=max_utts, - normalize=normalize, speed_perturbation=speed_perturbation) - self.featurizer = featurizer - self.blank_index = blank_index - self.trim = trim - print( - "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours.".format( - self.manifest.duration / 3600, - self.manifest.filtered_duration / 3600)) - - def __getitem__(self, index): - sample = self.manifest[index] - rn_indx = np.random.randint(len(sample['audio_filepath'])) - duration = sample['audio_duration'][rn_indx] if 'audio_duration' in sample else 0 - offset = sample['offset'] if 'offset' in sample else 0 - features = self.featurizer.process(sample['audio_filepath'][rn_indx], - offset=offset, duration=duration, - trim=self.trim) - - AudioSample = namedtuple('AudioSample', ['waveform', - 'transcript']) - return AudioSample(features, - torch.LongTensor(sample["transcript"])) - - def __len__(self): - return len(self.manifest) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/decoders.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/decoders.py deleted file mode 100644 index 81e6f650a58..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/decoders.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List, Optional, Tuple - -import torch - -import torch.nn.functional as F -from model_separable_rnnt import label_collate - - -class ScriptGreedyDecoder(torch.nn.Module): - """A greedy transducer decoder. - - Args: - blank_symbol: See `Decoder`. - model: Model to use for prediction. - max_symbols_per_step: The maximum number of symbols that can be added - to a sequence in a single time step; if set to None then there is - no limit. - cutoff_prob: Skip to next step in search if current highest character - probability is less than this. - """ - - def __init__(self, blank_index, model, max_symbols_per_step=30): - super().__init__() - # assert isinstance(model, torch.jit.ScriptModule) - # assert not model.training - self.eval() - self._model = model - self._blank_id = blank_index - self._SOS = -1 - assert max_symbols_per_step > 0 - self._max_symbols_per_step = max_symbols_per_step - - @torch.jit.export - def forward(self, x: torch.Tensor, out_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, List[List[int]]]: - """Returns a list of sentences given an input batch. - - Args: - x: A tensor of size (batch, channels, features, seq_len) - TODO was (seq_len, batch, in_features). - out_lens: list of int representing the length of each sequence - output sequence. - - Returns: - list containing batch number of sentences (strings). - """ - # Apply optional preprocessing - logits, logits_lens = self._model.encoder(x, out_lens) - - output: List[List[int]] = [] - for batch_idx in range(logits.size(0)): - inseq = logits[batch_idx, :, :].unsqueeze(1) - # inseq: TxBxF - logitlen = logits_lens[batch_idx] - sentence = self._greedy_decode(inseq, logitlen) - output.append(sentence) - - return logits, logits_lens, output - - def _greedy_decode(self, x: torch.Tensor, out_len: torch.Tensor) -> List[int]: - hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None - label: List[int] = [] - for time_idx in range(int(out_len.item())): - f = x[time_idx, :, :].unsqueeze(0) - - not_blank = True - symbols_added = 0 - - while not_blank and symbols_added < self._max_symbols_per_step: - g, hidden_prime = self._pred_step( - self._get_last_symb(label), - hidden - ) - logp = self._joint_step(f, g, log_normalize=False)[0, :] - - # get index k, of max prob - v, k = logp.max(0) - k = k.item() - - if k == self._blank_id: - not_blank = False - else: - label.append(k) - hidden = hidden_prime - symbols_added += 1 - - return label - - def _pred_step(self, label: int, hidden: Optional[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - if label == self._SOS: - return self._model.prediction(None, hidden) - if label > self._blank_id: - label -= 1 - label = torch.tensor([[label]], dtype=torch.int64) - return self._model.prediction(label, hidden) - - def _joint_step(self, enc: torch.Tensor, pred: torch.Tensor, log_normalize: bool=False) -> torch.Tensor: - logits = self._model.joint(enc, pred)[:, 0, 0, :] - if not log_normalize: - return logits - - probs = F.log_softmax(logits, dim=len(logits.shape) - 1) - - return probs - - def _get_last_symb(self, labels: List[int]) -> int: - return self._SOS if len(labels) == 0 else labels[-1] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/helpers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/helpers.py deleted file mode 100644 index cfe3b66f3c8..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/helpers.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from enum import Enum -from metrics import word_error_rate - - -class Optimization(Enum): - """Various levels of Optimization. - WARNING: This might have effect on model accuracy.""" - nothing = 0 - mxprO0 = 1 - mxprO1 = 2 - mxprO2 = 3 - mxprO3 = 4 - - -AmpOptimizations = {Optimization.mxprO0: "O0", - Optimization.mxprO1: "O1", - Optimization.mxprO2: "O2", - Optimization.mxprO3: "O3"} - - -def add_blank_label(labels): - if not isinstance(labels, list): - raise ValueError("labels must be a list of symbols") - labels.append("") - return labels - - -def __rnnt_decoder_predictions_tensor(tensor, labels): - """ - Takes output of greedy rnnt decoder and converts to strings. - Args: - tensor: model output tensor - label: A list of labels - Returns: - prediction - """ - hypotheses = [] - labels_map = dict([(i, labels[i]) for i in range(len(labels))]) - # iterate over batch - for ind in range(len(tensor)): - hypothesis = ''.join([labels_map[c] for c in tensor[ind]]) - hypotheses.append(hypothesis) - return hypotheses - - -def __gather_predictions(predictions_list: list, labels: list) -> list: - results = [] - for prediction in predictions_list: - results += __rnnt_decoder_predictions_tensor(prediction, labels=labels) - return results - - -def __gather_transcripts(transcript_list: list, transcript_len_list: list, - labels: list) -> list: - results = [] - labels_map = dict([(i, labels[i]) for i in range(len(labels))]) - for i, t in enumerate(transcript_list): - target = t.numpy().tolist() - reference = ''.join([labels_map[c] for c in target]) - results.append(reference) - return results - - -def process_evaluation_batch(tensors: dict, global_vars: dict, labels: list): - """ - Processes results of an iteration and saves it in global_vars - Args: - tensors: dictionary with results of an evaluation iteration, e.g. loss, predictions, transcript, and output - global_vars: dictionary where processes results of iteration are saved - labels: A list of labels - """ - for kv, v in tensors.items(): - if kv.startswith('predictions'): - global_vars['predictions'] += __gather_predictions( - v, labels=labels) - elif kv.startswith('transcript_length'): - transcript_len_list = v - elif kv.startswith('transcript'): - transcript_list = v - - global_vars['transcripts'] += __gather_transcripts(transcript_list, - transcript_len_list, - labels=labels) - - -def process_evaluation_epoch(global_vars: dict, tag=None): - """ - Processes results from each worker at the end of evaluation and combine to final result - Args: - global_vars: dictionary containing information of entire evaluation - Return: - wer: final word error rate - loss: final loss - """ - hypotheses = global_vars['predictions'] - references = global_vars['transcripts'] - - wer, scores, num_words = word_error_rate( - hypotheses=hypotheses, references=references) - return wer - - -def print_dict(d): - maxLen = max([len(ii) for ii in d.keys()]) - fmtString = '\t%' + str(maxLen) + 's : %s' - print('Arguments:') - for keyPair in sorted(d.items()): - print(fmtString % keyPair) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/metrics.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/metrics.py deleted file mode 100644 index 5426e37237a..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/metrics.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List - - -def __levenshtein(a: List, b: List) -> int: - """Calculates the Levenshtein distance between a and b. - """ - n, m = len(a), len(b) - if n > m: - # Make sure n <= m, to use O(min(n,m)) space - a, b = b, a - n, m = m, n - - current = list(range(n + 1)) - for i in range(1, m + 1): - previous, current = current, [i] + [0] * n - for j in range(1, n + 1): - add, delete = previous[j] + 1, current[j - 1] + 1 - change = previous[j - 1] - if a[j - 1] != b[i - 1]: - change = change + 1 - current[j] = min(add, delete, change) - - return current[n] - - -def word_error_rate(hypotheses: List[str], references: List[str]) -> float: - """ - Computes Average Word Error rate between two texts represented as - corresponding lists of string. Hypotheses and references must have same length. - - Args: - hypotheses: list of hypotheses - references: list of references - - Returns: - (float) average word error rate - """ - scores = 0 - words = 0 - if len(hypotheses) != len(references): - raise ValueError("In word error rate calculation, hypotheses and reference" - " lists must have the same number of elements. But I got:" - "{0} and {1} correspondingly".format(len(hypotheses), len(references))) - for h, r in zip(hypotheses, references): - h_list = h.split() - r_list = r.split() - words += len(r_list) - scores += __levenshtein(h_list, r_list) - if words != 0: - wer = (1.0 * scores) / words - else: - wer = float('inf') - return wer, scores, words diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/model_separable_rnnt.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/model_separable_rnnt.py deleted file mode 100644 index 68a0ed6b5e5..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/model_separable_rnnt.py +++ /dev/null @@ -1,214 +0,0 @@ -from typing import Optional, Tuple - -import numpy as np -import torch - -from rnn import rnn -from rnn import StackTime - - -class RNNT(torch.nn.Module): - def __init__(self, rnnt=None, num_classes=1, **kwargs): - super().__init__() - if kwargs.get("no_featurizer", False): - in_features = kwargs.get("in_features") - else: - feat_config = kwargs.get("feature_config") - # This may be useful in the future, for MLPerf - # configuration. - in_features = feat_config['features'] * \ - feat_config.get("frame_splicing", 1) - - self.encoder = Encoder(in_features, - rnnt["encoder_n_hidden"], - rnnt["encoder_pre_rnn_layers"], - rnnt["encoder_post_rnn_layers"], - rnnt["forget_gate_bias"], - None if "norm" not in rnnt else rnnt["norm"], - rnnt["rnn_type"], - rnnt["encoder_stack_time_factor"], - rnnt["dropout"], - ) - - self.prediction = Prediction( - num_classes, - rnnt["pred_n_hidden"], - rnnt["pred_rnn_layers"], - rnnt["forget_gate_bias"], - None if "norm" not in rnnt else rnnt["norm"], - rnnt["rnn_type"], - rnnt["dropout"], - ) - - self.joint = Joint( - num_classes, - rnnt["pred_n_hidden"], - rnnt["encoder_n_hidden"], - rnnt["joint_n_hidden"], - rnnt["dropout"], - ) - - def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - return self.encoder(x_padded, x_lens) - - -class Encoder(torch.nn.Module): - def __init__(self, in_features, encoder_n_hidden, - encoder_pre_rnn_layers, encoder_post_rnn_layers, - forget_gate_bias, norm, rnn_type, encoder_stack_time_factor, - dropout): - super().__init__() - self.pre_rnn = rnn( - rnn=rnn_type, - input_size=in_features, - hidden_size=encoder_n_hidden, - num_layers=encoder_pre_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - dropout=dropout, - ) - self.stack_time = StackTime(factor=encoder_stack_time_factor) - self.post_rnn = rnn( - rnn=rnn_type, - input_size=encoder_stack_time_factor * encoder_n_hidden, - hidden_size=encoder_n_hidden, - num_layers=encoder_post_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - norm_first_rnn=True, - dropout=dropout, - ) - - def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - x_padded, _ = self.pre_rnn(x_padded, None) - x_padded, x_lens = self.stack_time(x_padded, x_lens) - # (T, B, H) - x_padded, _ = self.post_rnn(x_padded, None) - # (B, T, H) - x_padded = x_padded.transpose(0, 1) - return x_padded, x_lens - -class Prediction(torch.nn.Module): - def __init__(self, vocab_size, n_hidden, pred_rnn_layers, - forget_gate_bias, norm, rnn_type, dropout): - super().__init__() - self.embed = torch.nn.Embedding(vocab_size - 1, n_hidden) - self.n_hidden = n_hidden - self.dec_rnn = rnn( - rnn=rnn_type, - input_size=n_hidden, - hidden_size=n_hidden, - num_layers=pred_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - dropout=dropout, - ) - - def forward(self, y: Optional[torch.Tensor], - state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - """ - B - batch size - U - label length - H - Hidden dimension size - L - Number of decoder layers = 2 - - Args: - y: (B, U) - - Returns: - Tuple (g, hid) where: - g: (B, U + 1, H) - hid: (h, c) where h is the final sequence hidden state and c is - the final cell state: - h (tensor), shape (L, B, H) - c (tensor), shape (L, B, H) - """ - if y is None: - # This is gross. I should really just pass in an SOS token - # instead. Is there no SOS token? - assert state is None - # Hacky, no way to determine this right now! - B = 1 - y = torch.zeros((B, 1, self.n_hidden), dtype=torch.float32) - else: - y = self.embed(y) - - # if state is None: - # batch = y.size(0) - # state = [ - # (torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device), - # torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device)) - # for _ in range(self.pred_rnn_layers) - # ] - - y = y.transpose(0, 1) # .contiguous() # (U + 1, B, H) - g, hid = self.dec_rnn(y, state) - g = g.transpose(0, 1) # .contiguous() # (B, U + 1, H) - # del y, state - return g, hid - -class Joint(torch.nn.Module): - def __init__(self, vocab_size, pred_n_hidden, enc_n_hidden, - joint_n_hidden, dropout): - super().__init__() - layers = [ - torch.nn.Linear(pred_n_hidden + enc_n_hidden, joint_n_hidden), - torch.nn.ReLU(), - ] + ([torch.nn.Dropout(p=dropout), ] if dropout else []) + [ - torch.nn.Linear(joint_n_hidden, vocab_size) - ] - self.net = torch.nn.Sequential( - *layers - ) - - def forward(self, f: torch.Tensor, g: torch.Tensor): - """ - f should be shape (B, T, H) - g should be shape (B, U + 1, H) - - returns: - logits of shape (B, T, U, K + 1) - """ - # Combine the input states and the output states - B, T, H = f.shape - B, U_, H2 = g.shape - - f = f.unsqueeze(dim=2) # (B, T, 1, H) - f = f.expand((B, T, U_, H)) - - g = g.unsqueeze(dim=1) # (B, 1, U + 1, H) - g = g.expand((B, T, U_, H2)) - - inp = torch.cat([f, g], dim=3) # (B, T, U, 2H) - res = self.net(inp) - # del f, g, inp - return res - -def label_collate(labels): - """Collates the label inputs for the rnn-t prediction network. - - If `labels` is already in torch.Tensor form this is a no-op. - - Args: - labels: A torch.Tensor List of label indexes or a torch.Tensor. - - Returns: - A padded torch.Tensor of shape (batch, max_seq_len). - """ - - if isinstance(labels, torch.Tensor): - return labels.type(torch.int64) - if not isinstance(labels, (list, tuple)): - raise ValueError( - f"`labels` should be a list or tensor not {type(labels)}" - ) - - batch_size = len(labels) - max_len = max(len(l) for l in labels) - - cat_labels = np.full((batch_size, max_len), fill_value=0.0, dtype=np.int32) - for e, l in enumerate(labels): - cat_labels[e, :len(l)] = l - labels = torch.LongTensor(cat_labels) - - return labels diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/features.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/features.py deleted file mode 100644 index 5a1309758eb..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/features.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple - -import torch -import torch.nn as nn -import math -import librosa -from .segment import AudioSegment - - -class WaveformFeaturizer(object): - def __init__(self, input_cfg): - self.cfg = input_cfg - - def process(self, file_path, offset=0, duration=0, trim=False): - audio = AudioSegment.from_file(file_path, - target_sr=self.cfg['sample_rate'], - int_values=self.cfg.get( - 'int_values', False), - offset=offset, duration=duration, trim=trim) - return self.process_segment(audio) - - def process_segment(self, audio_segment): - return torch.tensor(audio_segment.samples, dtype=torch.float) - - @classmethod - def from_config(cls, input_config, perturbation_configs=None): - return cls(input_config) - - -constant = 1e-5 - - -def normalize_batch(x, seq_len, normalize_type): - if normalize_type == "per_feature": - x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - for i in range(x.shape[0]): - x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1) - x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1) - # make sure x_std is not zero - x_std += constant - return (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2) - elif normalize_type == "all_features": - x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - for i in range(x.shape[0]): - x_mean[i] = x[i, :, :seq_len[i].item()].mean() - x_std[i] = x[i, :, :seq_len[i].item()].std() - # make sure x_std is not zero - x_std += constant - return (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1) - else: - return x - - -def splice_frames(x, frame_splicing): - """ Stacks frames together across feature dim - - input is batch_size, feature_dim, num_frames - output is batch_size, feature_dim*frame_splicing, num_frames - - """ - seq = [x] - for n in range(1, frame_splicing): - tmp = torch.zeros_like(x) - tmp[:, :, :-n] = x[:, :, n:] - seq.append(tmp) - return torch.cat(seq, dim=1)[:, :, ::frame_splicing] - - -class FilterbankFeatures(nn.Module): - def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01, - window="hamming", normalize="per_feature", n_fft=None, - preemph=0.97, - nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant, - pad_to=8, - max_duration=16.7, - frame_splicing=1): - super(FilterbankFeatures, self).__init__() -# print("PADDING: {}".format(pad_to)) - - torch_windows = { - 'hann': torch.hann_window, - 'hamming': torch.hamming_window, - 'blackman': torch.blackman_window, - 'bartlett': torch.bartlett_window, - 'none': None, - } - - self.win_length = int(sample_rate * window_size) # frame size - self.hop_length = int(sample_rate * window_stride) - self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) - - self.normalize = normalize - self.log = log - self.dither = dither - self.frame_splicing = frame_splicing - self.nfilt = nfilt - self.preemph = preemph - self.pad_to = pad_to - # For now, always enable this. - # See https://docs.google.com/presentation/d/1IVC3J-pHB-ipJpKsJox_SqmDHYdkIaoCXTbKmJmV2-I/edit?usp=sharing for elaboration - self.use_deterministic_dithering = True - highfreq = highfreq or sample_rate / 2 - window_fn = torch_windows.get(window, None) - window_tensor = window_fn(self.win_length, - periodic=False) if window_fn else None - filterbanks = torch.tensor( - librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, - fmax=highfreq), dtype=torch.float).unsqueeze(0) - # self.fb = filterbanks - # self.window = window_tensor - self.register_buffer("fb", filterbanks) - self.register_buffer("window", window_tensor) - # Calculate maximum sequence length (# frames) - max_length = 1 + math.ceil( - (max_duration * sample_rate - self.win_length) / self.hop_length - ) - max_pad = 16 - (max_length % 16) - self.max_length = max_length + max_pad - - def get_seq_len(self, seq_len): - seq_len = (seq_len + self.hop_length - 1) // self.hop_length - seq_len = (seq_len + self.frame_splicing - 1) // self.frame_splicing - return seq_len - - @torch.no_grad() - def forward(self, inp: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: - x, seq_len = inp - - dtype = x.dtype - - seq_len = self.get_seq_len(seq_len) - - # dither - if self.dither > 0 and not self.use_deterministic_dithering: - x += self.dither * torch.randn_like(x) - - # do preemphasis - # Ideally, we would mask immediately after this... Ugh :( - if self.preemph is not None: - x = torch.cat((x[:, 0].unsqueeze(1), x[:, 1:] - self.preemph * x[:, :-1]), - dim=1) - - # do stft - x = torch.stft(x, n_fft=self.n_fft, hop_length=self.hop_length, - win_length=self.win_length, - center=True, window=self.window.to(dtype=torch.float), - return_complex=False - ) - # get power spectrum - x = x.pow(2).sum(-1) - - if self.dither > 0 and self.use_deterministic_dithering: - x = x + self.dither ** 2 - # dot with filterbank energies - x = torch.matmul(self.fb.to(x.dtype), x) - - # log features if required - if self.log: - x = torch.log(x + 1e-20) - - # frame splicing if required - if self.frame_splicing > 1: - seq = [x] - for n in range(1, self.frame_splicing): - tmp = torch.zeros_like(x) - tmp[:, :, :-n] = x[:, :, n:] - seq.append(tmp) - x = torch.cat(seq, dim=1)[:, :, ::self.frame_splicing] - - # normalize if required - constant = 1e-5 - if self.normalize == "per_feature": - x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - for i in range(x.shape[0]): - x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1) - x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1) - # make sure x_std is not zero - x_std += constant - x = (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2) - elif self.normalize == "all_features": - x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - for i in range(x.shape[0]): - x_mean[i] = x[i, :, :seq_len[i].item()].mean() - x_std[i] = x[i, :, :seq_len[i].item()].std() - # make sure x_std is not zero - x_std += constant - x = (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1) - else: - x = x - - # Hmmm... They don't do any masking anymore. Seems concerning! - - # mask to zero any values beyond seq_len in batch, pad to multiple of `pad_to` (for efficiency) - # max_len = x.size(-1) - x = x[:, :, :seq_len.max()] # rnnt loss requires lengths to match - # mask = torch.arange(max_len).to(seq_len.dtype).to(x.device).expand(x.size(0), - # max_len) >= seq_len.unsqueeze(1) - - # x = x.masked_fill(mask.unsqueeze(1).to(device=x.device), 0) - pad_to = self.pad_to - if pad_to != 0: - raise NotImplementedError() - # if pad_to == "max": - # x = nn.functional.pad(x, (0, self.max_length - x.size(-1))) - # elif pad_to > 0: - # pad_amt = x.size(-1) % pad_to - # if pad_amt != 0: - # x = nn.functional.pad(x, (0, pad_to - pad_amt)) - - return x.to(dtype) - - @classmethod - def from_config(cls, cfg, log=False): - return cls(sample_rate=cfg['sample_rate'], window_size=cfg['window_size'], - window_stride=cfg['window_stride'], n_fft=cfg['n_fft'], - nfilt=cfg['features'], window=cfg['window'], - normalize=cfg['normalize'], - max_duration=cfg.get('max_duration', 16.7), - dither=cfg['dither'], pad_to=cfg.get("pad_to", 0), - frame_splicing=cfg.get("frame_splicing", 1), log=log) - - -class FeatureFactory(object): - featurizers = { - "logfbank": FilterbankFeatures, - "fbank": FilterbankFeatures, - } - - def __init__(self): - pass - - @classmethod - def from_config(cls, cfg): - feat_type = cfg.get('feat_type', "logspect") - featurizer = cls.featurizers[feat_type] - # return featurizer.from_config(cfg, log="log" in cfg['feat_type']) - return featurizer.from_config(cfg, log="log" in feat_type) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/manifest.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/manifest.py deleted file mode 100644 index fb04c5da882..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/manifest.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import string -import os - -from .text import _clean_text - - -def normalize_string(s, labels, table, **unused_kwargs): - """ - Normalizes string. For example: - 'call me at 8:00 pm!' -> 'call me at eight zero pm' - - Args: - s: string to normalize - labels: labels used during model training. - - Returns: - Normalized string - """ - - def good_token(token, labels): - s = set(labels) - for t in token: - if t not in s: - return False - return True - - try: - text = _clean_text(s, ["english_cleaners"], table).strip() - return ''.join([t for t in text if good_token(t, labels=labels)]) - except: - print("WARNING: Normalizing {} failed".format(s)) - return None - - -class Manifest(object): - def __init__(self, data_dir, manifest_paths, labels, blank_index, max_duration=None, pad_to_max=False, - min_duration=None, sort_by_duration=False, max_utts=0, - normalize=True, speed_perturbation=False, filter_speed=1.0): - self.labels_map = dict([(labels[i], i) for i in range(len(labels))]) - self.blank_index = blank_index - self.max_duration = max_duration - ids = [] - duration = 0.0 - filtered_duration = 0.0 - - # If removing punctuation, make a list of punctuation to remove - table = None - if normalize: - # Punctuation to remove - punctuation = string.punctuation - punctuation = punctuation.replace("+", "") - punctuation = punctuation.replace("&", "") - # We might also want to consider: - # @ -> at - # -> number, pound, hashtag - # ~ -> tilde - # _ -> underscore - # % -> percent - # If a punctuation symbol is inside our vocab, we do not remove from text - for l in labels: - punctuation = punctuation.replace(l, "") - # Turn all punctuation to whitespace - table = str.maketrans(punctuation, " " * len(punctuation)) - for manifest_path in manifest_paths: - with open(manifest_path, "r", encoding="utf-8") as fh: - a = json.load(fh) - for data in a: - files_and_speeds = data['files'] - - if pad_to_max: - if not speed_perturbation: - min_speed = filter_speed - else: - min_speed = min(x['speed'] - for x in files_and_speeds) - max_duration = self.max_duration * min_speed - - data['duration'] = data['original_duration'] - if min_duration is not None and data['duration'] < min_duration: - filtered_duration += data['duration'] - continue - if max_duration is not None and data['duration'] > max_duration: - filtered_duration += data['duration'] - continue - - # Prune and normalize according to transcript - transcript_text = data[ - 'transcript'] if "transcript" in data else self.load_transcript( - data['text_filepath']) - if normalize: - transcript_text = normalize_string(transcript_text, labels=labels, - table=table) - if not isinstance(transcript_text, str): - print( - "WARNING: Got transcript: {}. It is not a string. Dropping data point".format( - transcript_text)) - filtered_duration += data['duration'] - continue - data["transcript"] = self.parse_transcript( - transcript_text) # convert to vocab indices - - if speed_perturbation: - audio_paths = [x['fname'] for x in files_and_speeds] - data['audio_duration'] = [x['duration'] - for x in files_and_speeds] - else: - audio_paths = [ - x['fname'] for x in files_and_speeds if x['speed'] == filter_speed] - data['audio_duration'] = [x['duration'] - for x in files_and_speeds if x['speed'] == filter_speed] - data['audio_filepath'] = [os.path.join( - data_dir, x) for x in audio_paths] - data.pop('files') - data.pop('original_duration') - - ids.append(data) - duration += data['duration'] - - if max_utts > 0 and len(ids) >= max_utts: - print( - 'Stopping parsing %s as max_utts=%d' % (manifest_path, max_utts)) - break - - if sort_by_duration: - ids = sorted(ids, key=lambda x: x['duration']) - self._data = ids - self._size = len(ids) - self._duration = duration - self._filtered_duration = filtered_duration - - def load_transcript(self, transcript_path): - with open(transcript_path, 'r', encoding="utf-8") as transcript_file: - transcript = transcript_file.read().replace('\n', '') - return transcript - - def parse_transcript(self, transcript): - chars = [self.labels_map.get(x, self.blank_index) - for x in list(transcript)] - transcript = list(filter(lambda x: x != self.blank_index, chars)) - return transcript - - def __getitem__(self, item): - return self._data[item] - - def __len__(self): - return self._size - - def __iter__(self): - return iter(self._data) - - @property - def duration(self): - return self._duration - - @property - def filtered_duration(self): - return self._filtered_duration - - @property - def data(self): - return list(self._data) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/segment.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/segment.py deleted file mode 100644 index 08aa5c6a492..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/segment.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import librosa -import soundfile as sf - - -class AudioSegment(object): - """Monaural audio segment abstraction. - :param samples: Audio samples [num_samples x num_channels]. - :type samples: ndarray.float32 - :param sample_rate: Audio sample rate. - :type sample_rate: int - :raises TypeError: If the sample data type is not float or int. - """ - - def __init__(self, samples, sample_rate, target_sr=None, trim=False, - trim_db=60): - """Create audio segment from samples. - Samples are convert float32 internally, with int scaled to [-1, 1]. - """ - samples = self._convert_samples_to_float32(samples) - if target_sr is not None and target_sr != sample_rate: - samples = librosa.core.resample(samples, sample_rate, target_sr) - sample_rate = target_sr - if trim: - samples, _ = librosa.effects.trim(samples, trim_db) - self._samples = samples - self._sample_rate = sample_rate - if self._samples.ndim >= 2: - self._samples = np.mean(self._samples, 1) - - def __eq__(self, other): - """Return whether two objects are equal.""" - if type(other) is not type(self): - return False - if self._sample_rate != other._sample_rate: - return False - if self._samples.shape != other._samples.shape: - return False - if np.any(self.samples != other._samples): - return False - return True - - def __ne__(self, other): - """Return whether two objects are unequal.""" - return not self.__eq__(other) - - def __str__(self): - """Return human-readable representation of segment.""" - return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, " - "rms=%.2fdB" % (type(self), self.num_samples, self.sample_rate, - self.duration, self.rms_db)) - - @staticmethod - def _convert_samples_to_float32(samples): - """Convert sample type to float32. - Audio sample type is usually integer or float-point. - Integers will be scaled to [-1, 1] in float32. - """ - float32_samples = samples.astype('float32') - if samples.dtype in np.sctypes['int']: - bits = np.iinfo(samples.dtype).bits - float32_samples *= (1. / 2 ** (bits - 1)) - elif samples.dtype in np.sctypes['float']: - pass - else: - raise TypeError("Unsupported sample type: %s." % samples.dtype) - return float32_samples - - @classmethod - def from_file(cls, filename, target_sr=None, int_values=False, offset=0, - duration=0, trim=False): - """ - Load a file supported by librosa and return as an AudioSegment. - :param filename: path of file to load - :param target_sr: the desired sample rate - :param int_values: if true, load samples as 32-bit integers - :param offset: offset in seconds when loading audio - :param duration: duration in seconds when loading audio - :return: numpy array of samples - """ - with sf.SoundFile(filename, 'r') as f: - dtype = 'int32' if int_values else 'float32' - sample_rate = f.samplerate - if offset > 0: - f.seek(int(offset * sample_rate)) - if duration > 0: - samples = f.read(int(duration * sample_rate), dtype=dtype) - else: - samples = f.read(dtype=dtype) - samples = samples.transpose() - return cls(samples, sample_rate, target_sr=target_sr, trim=trim) - - @property - def samples(self): - return self._samples.copy() - - @property - def sample_rate(self): - return self._sample_rate - - @property - def num_samples(self): - return self._samples.shape[0] - - @property - def duration(self): - return self._samples.shape[0] / float(self._sample_rate) - - @property - def rms_db(self): - mean_square = np.mean(self._samples ** 2) - return 10 * np.log10(mean_square) - - def gain_db(self, gain): - self._samples *= 10. ** (gain / 20.) - - def pad(self, pad_size, symmetric=False): - """Add zero padding to the sample. The pad size is given in number of samples. - If symmetric=True, `pad_size` will be added to both sides. If false, `pad_size` - zeros will be added only to the end. - """ - self._samples = np.pad(self._samples, - (pad_size if symmetric else 0, pad_size), - mode='constant') - - def subsegment(self, start_time=None, end_time=None): - """Cut the AudioSegment between given boundaries. - Note that this is an in-place transformation. - :param start_time: Beginning of subsegment in seconds. - :type start_time: float - :param end_time: End of subsegment in seconds. - :type end_time: float - :raise ValueError: If start_time or end_time is incorrectly set, e.g. out - of bounds in time. - """ - start_time = 0.0 if start_time is None else start_time - end_time = self.duration if end_time is None else end_time - if start_time < 0.0: - start_time = self.duration + start_time - if end_time < 0.0: - end_time = self.duration + end_time - if start_time < 0.0: - raise ValueError("The slice start position (%f s) is out of " - "bounds." % start_time) - if end_time < 0.0: - raise ValueError("The slice end position (%f s) is out of bounds." % - end_time) - if start_time > end_time: - raise ValueError("The slice start position (%f s) is later than " - "the end position (%f s)." % (start_time, end_time)) - if end_time > self.duration: - raise ValueError("The slice end position (%f s) is out of bounds " - "(> %f s)" % (end_time, self.duration)) - start_sample = int(round(start_time * self._sample_rate)) - end_sample = int(round(end_time * self._sample_rate)) - self._samples = self._samples[start_sample:end_sample] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/LICENSE deleted file mode 100644 index 4ad4ed1d5e3..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2017 Keith Ito - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/__init__.py deleted file mode 100644 index 61936879a95..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2017 Keith Ito -""" from https://github.com/keithito/tacotron """ -from . import cleaners - - -def _clean_text(text, cleaner_names, *args): - for name in cleaner_names: - cleaner = getattr(cleaners, name) - if not cleaner: - raise Exception('Unknown cleaner: %s' % name) - text = cleaner(text, *args) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/cleaners.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/cleaners.py deleted file mode 100644 index e1e52af5f37..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/cleaners.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2017 Keith Ito -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" from https://github.com/keithito/tacotron -Modified to add puncturation removal -""" - -''' -Cleaners are transformations that run over the input text at both training and eval time. - -Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners" -hyperparameter. Some cleaners are English-specific. You'll typically want to use: - 1. "english_cleaners" for English text - 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using - the Unidecode library (https://pypi.python.org/pypi/Unidecode) - 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update - the symbols in symbols.py to match your data). - -''' - - -# Regular expression matching whitespace: -import re -from text_unidecode import unidecode -from .numbers import normalize_numbers -_whitespace_re = re.compile(r'\s+') - -# List of (regular expression, replacement) pairs for abbreviations: -_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [ - ('mrs', 'misess'), - ('mr', 'mister'), - ('dr', 'doctor'), - ('st', 'saint'), - ('co', 'company'), - ('jr', 'junior'), - ('maj', 'major'), - ('gen', 'general'), - ('drs', 'doctors'), - ('rev', 'reverend'), - ('lt', 'lieutenant'), - ('hon', 'honorable'), - ('sgt', 'sergeant'), - ('capt', 'captain'), - ('esq', 'esquire'), - ('ltd', 'limited'), - ('col', 'colonel'), - ('ft', 'fort'), -]] - - -def expand_abbreviations(text): - for regex, replacement in _abbreviations: - text = re.sub(regex, replacement, text) - return text - - -def expand_numbers(text): - return normalize_numbers(text) - - -def lowercase(text): - return text.lower() - - -def collapse_whitespace(text): - return re.sub(_whitespace_re, ' ', text) - - -def convert_to_ascii(text): - return unidecode(text) - - -def remove_punctuation(text, table): - text = text.translate(table) - text = re.sub(r'&', " and ", text) - text = re.sub(r'\+', " plus ", text) - return text - - -def basic_cleaners(text): - '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def transliteration_cleaners(text): - '''Pipeline for non-English text that transliterates to ASCII.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def english_cleaners(text, table=None): - '''Pipeline for English text, including number and abbreviation expansion.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = expand_numbers(text) - text = expand_abbreviations(text) - if table is not None: - text = remove_punctuation(text, table) - text = collapse_whitespace(text) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/numbers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/numbers.py deleted file mode 100644 index d4b2f0d749f..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/parts/text/numbers.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2017 Keith Ito -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" from https://github.com/keithito/tacotron -Modified to add support for time and slight tweaks to _expand_number -""" - -import inflect -import re - - -_inflect = inflect.engine() -_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') -_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') -_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') -_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') -_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') -_number_re = re.compile(r'[0-9]+') -_time_re = re.compile(r'([0-9]{1,2}):([0-9]{2})') - - -def _remove_commas(m): - return m.group(1).replace(',', '') - - -def _expand_decimal_point(m): - return m.group(1).replace('.', ' point ') - - -def _expand_dollars(m): - match = m.group(1) - parts = match.split('.') - if len(parts) > 2: - return match + ' dollars' # Unexpected format - dollars = int(parts[0]) if parts[0] else 0 - cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 - if dollars and cents: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) - elif dollars: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - return '%s %s' % (dollars, dollar_unit) - elif cents: - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s' % (cents, cent_unit) - else: - return 'zero dollars' - - -def _expand_ordinal(m): - return _inflect.number_to_words(m.group(0)) - - -def _expand_number(m): - if int(m.group(0)[0]) == 0: - return _inflect.number_to_words(m.group(0), andword='', group=1) - num = int(m.group(0)) - if num > 1000 and num < 3000: - if num == 2000: - return 'two thousand' - elif num > 2000 and num < 2010: - return 'two thousand ' + _inflect.number_to_words(num % 100) - elif num % 100 == 0: - return _inflect.number_to_words(num // 100) + ' hundred' - else: - return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') - # Add check for number phones and other large numbers - elif num > 1000000000 and num % 10000 != 0: - return _inflect.number_to_words(num, andword='', group=1) - else: - return _inflect.number_to_words(num, andword='') - - -def _expand_time(m): - mins = int(m.group(2)) - if mins == 0: - return _inflect.number_to_words(m.group(1)) - return " ".join([_inflect.number_to_words(m.group(1)), _inflect.number_to_words(m.group(2))]) - - -def normalize_numbers(text): - text = re.sub(_comma_number_re, _remove_commas, text) - text = re.sub(_pounds_re, r'\1 pounds', text) - text = re.sub(_dollars_re, _expand_dollars, text) - text = re.sub(_decimal_number_re, _expand_decimal_point, text) - text = re.sub(_ordinal_re, _expand_ordinal, text) - text = re.sub(_number_re, _expand_number, text) - text = re.sub(_time_re, _expand_time, text) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/preprocessing.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/preprocessing.py deleted file mode 100644 index 581885466b0..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/preprocessing.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple - -import torch -import torch.nn as nn - -from helpers import Optimization -from parts.features import FeatureFactory - - -class AudioPreprocessing(nn.Module): - """GPU accelerated audio preprocessing - """ - - def __init__(self, **kwargs): - nn.Module.__init__(self) # For PyTorch API - self.optim_level = kwargs.get( - 'optimization_level', Optimization.nothing) - self.featurizer = FeatureFactory.from_config(kwargs) - - def forward(self, x: Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: - input_signal, length = x - length.requires_grad_(False) - processed_signal = self.featurizer(x) - processed_length = self.featurizer.get_seq_len(length) - return processed_signal, processed_length diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/rnn.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/rnn.py deleted file mode 100644 index 29198ebbef5..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/rnn.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - -from typing import Optional, Tuple - - -def rnn(rnn, input_size, hidden_size, num_layers, norm=None, - forget_gate_bias=1.0, dropout=0.0, **kwargs): - """TODO""" - if rnn != "lstm": - raise ValueError(f"Unknown rnn={rnn}") - if norm not in [None]: - raise ValueError(f"unknown norm={norm}") - - if rnn == "lstm": - return LstmDrop( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - forget_gate_bias=forget_gate_bias, - **kwargs - ) - - -class LstmDrop(torch.nn.Module): - - def __init__(self, input_size, hidden_size, num_layers, dropout, forget_gate_bias, - **kwargs): - """Returns an LSTM with forget gate bias init to `forget_gate_bias`. - - Args: - input_size: See `torch.nn.LSTM`. - hidden_size: See `torch.nn.LSTM`. - num_layers: See `torch.nn.LSTM`. - dropout: See `torch.nn.LSTM`. - forget_gate_bias: For each layer and each direction, the total value of - to initialise the forget gate bias to. - - Returns: - A `torch.nn.LSTM`. - """ - super(LstmDrop, self).__init__() - - self.lstm = torch.nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - ) - if forget_gate_bias is not None: - for name, v in self.lstm.named_parameters(): - if "bias_ih" in name: - bias = getattr(self.lstm, name) - bias.data[hidden_size:2 * hidden_size].fill_(forget_gate_bias) - if "bias_hh" in name: - bias = getattr(self.lstm, name) - bias.data[hidden_size:2 * hidden_size].fill_(0) - - if dropout: - self.inplace_dropout = torch.nn.Dropout(dropout, inplace=True) - else: - self.inplace_dropout = None - - def forward(self, x: torch.Tensor, - h: Optional[Tuple[torch.Tensor, torch.Tensor]] = None): - if hasattr(self.lstm, "module"): - x, h = self.lstm.module(x, h) - else: - x, h = self.lstm(x, h) - - if self.inplace_dropout is not None: - self.inplace_dropout(x.data) - - return x, h - - -class StackTime(torch.nn.Module): - - __constants__ = ["factor"] - - def __init__(self, factor): - super().__init__() - self.factor = int(factor) - - def forward(self, x, x_lens): - # T, B, U - seq = [x] - for i in range(1, self.factor): - # This doesn't seem to make much sense... - tmp = torch.zeros_like(x) - tmp[:-i, :, :] = x[i:, :, :] - seq.append(tmp) - x_lens = torch.ceil(x_lens.float() / self.factor).int() - # Gross, this is horrible. What a waste of memory... - return torch.cat(seq, dim=2)[::self.factor, :, :], x_lens diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/build.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/build.sh deleted file mode 100644 index cfdc97c010e..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/build.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -docker build . --rm -t jasper \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/launch.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/launch.sh deleted file mode 100644 index 5c9c6a3f346..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/docker/launch.sh +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash - -DATA_DIR=$1 -CHECKPOINT_DIR=$2 -RESULT_DIR=$3 - -docker run -it --rm \ - --gpus='"device=1"' \ - --shm-size=4g \ - --ulimit memlock=-1 \ - --ulimit stack=67108864 \ - -v "$DATA_DIR":/datasets \ - -v "$CHECKPOINT_DIR":/checkpoints/ \ - -v "$RESULT_DIR":/results/ \ - -v $PWD:/code \ - -v $PWD:/workspace/jasper \ - mlperf-rnnt-ref bash diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/download_librispeech.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/download_librispeech.sh deleted file mode 100644 index ee322fe3043..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/download_librispeech.sh +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/usr/bin/env bash - -DATA_SET="LibriSpeech" -DATA_ROOT_DIR="/datasets" -DATA_DIR="${DATA_ROOT_DIR}/${DATA_SET}" -if [ ! -d "$DATA_DIR" ] -then - mkdir $DATA_DIR - chmod go+rx $DATA_DIR - python utils/download_librispeech.py utils/librispeech.csv $DATA_DIR -e ${DATA_ROOT_DIR}/ -else - echo "Directory $DATA_DIR already exists." -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/evaluation.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/evaluation.sh deleted file mode 100644 index fcd472fd9aa..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/evaluation.sh +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"false"} -NUM_GPUS=${8:-1} -PRECISION=${9:-"fp32"} -NUM_STEPS=${10:-"-1"} -SEED=${11:-0} -BATCH_SIZE=${12:-64} - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "jasper_evaluation_${DATASET}_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi - -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - - -CMD=" inference.py " -CMD+=" --batch_size $BATCH_SIZE " -CMD+=" --dataset_dir $DATA_DIR " -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --model_toml $MODEL_CONFIG " -CMD+=" --seed $SEED " -CMD+=" --ckpt $CHECKPOINT " -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PREC " -CMD+=" $STEPS " - - -if [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference.sh deleted file mode 100644 index 2d4474ce2b7..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference.sh +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - - -DATA_DIR=${1-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"false"} -PRECISION=${8:-"fp32"} -NUM_STEPS=${9:-"-1"} -SEED=${10:-0} -BATCH_SIZE=${11:-64} -MODELOUTPUT_FILE=${12:-"none"} -PREDICTION_FILE=${13:-"$RESULT_DIR/${DATASET}.predictions"} - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE) - printf -v TAG "jasper_inference_${DATASET}_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -PRED="" -if [ "$PREDICTION_FILE" = "none" ] ; then - PRED="" -else - PRED=" --save_prediction $PREDICTION_FILE" -fi - -OUTPUT="" -if [ "$MODELOUTPUT_FILE" = "none" ] ; then - OUTPUT=" " -else - OUTPUT=" --logits_save_to $MODELOUTPUT_FILE" -fi - - -if [ "$CUDNN_BENCHMARK" = "true" ]; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi - -CMD=" python inference.py " -CMD+=" --batch_size $BATCH_SIZE " -CMD+=" --dataset_dir $DATA_DIR " -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --model_toml $MODEL_CONFIG " -CMD+=" --seed $SEED " -CMD+=" --ckpt $CHECKPOINT " -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PRED " -CMD+=" $OUTPUT " -CMD+=" $PREC " -CMD+=" $STEPS " - - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" -fi -set +x -echo "MODELOUTPUT_FILE: ${MODELOUTPUT_FILE}" -echo "PREDICTION_FILE: ${PREDICTION_FILE}" diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference_benchmark.sh deleted file mode 100644 index 7aeea84c159..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/inference_benchmark.sh +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash - -echo "Container nvidia build = " $NVIDIA_BUILD_ID - - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"true"} -PRECISION=${8:-"fp32"} -NUM_STEPS=${9:-"-1"} -MAX_DURATION=${10:-"36"} -SEED=${11:-0} -BATCH_SIZE=${12:-64} - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - -CMD=" python inference_benchmark.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --seed=$SEED" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --ckpt=$CHECKPOINT" -CMD+=" --max_duration=$MAX_DURATION" -CMD+=" --pad_to=-1" -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PREC" -CMD+=" $STEPS" - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE ) - printf -v TAG "jasper_inference_benchmark_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" - grep 'latency' "$LOGFILE" -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/preprocess_librispeech.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/preprocess_librispeech.sh deleted file mode 100644 index 7cfe5cc6a57..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/preprocess_librispeech.sh +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/usr/bin/env bash - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-clean-100 \ - --dest_dir /datasets/LibriSpeech/train-clean-100-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-clean-100-wav.json \ - --speed 0.9 1.1 -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-clean-360 \ - --dest_dir /datasets/LibriSpeech/train-clean-360-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-clean-360-wav.json \ - --speed 0.9 1.1 -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-other-500 \ - --dest_dir /datasets/LibriSpeech/train-other-500-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-other-500-wav.json \ - --speed 0.9 1.1 - - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/dev-clean \ - --dest_dir /datasets/LibriSpeech/dev-clean-wav \ - --output_json /datasets/LibriSpeech/librispeech-dev-clean-wav.json -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/dev-other \ - --dest_dir /datasets/LibriSpeech/dev-other-wav \ - --output_json /datasets/LibriSpeech/librispeech-dev-other-wav.json - - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/test-clean \ - --dest_dir /datasets/LibriSpeech/test-clean-wav \ - --output_json /datasets/LibriSpeech/librispeech-test-clean-wav.json -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/test-other \ - --dest_dir /datasets/LibriSpeech/test-other-wav \ - --output_json /datasets/LibriSpeech/librispeech-test-other-wav.json diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train.sh deleted file mode 100644 index d59ce8ebeb2..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train.sh +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -MODEL_CONFIG=${2:-"configs/rnnt.toml"} -RESULT_DIR=${3:-"/results"} -CHECKPOINT=${4:-"none"} -CREATE_LOGFILE=${5:-"true"} -CUDNN_BENCHMARK=${6:-"true"} -NUM_GPUS=${7:-8} -PRECISION=${8:-"fp16"} -EPOCHS=${9:-100} -SEED=${10:-6} -BATCH_SIZE=${11:-8} -EVAL_BATCH_SIZE=${11:-2} -LEARNING_RATE=${12:-"0.001"} -LEARNING_RATE_WARMUP=${12:-"8000"} -GRADIENT_ACCUMULATION_STEPS=${13:-1} -LAUNCH_OPT=${LAUNCH_OPT:-"none"} - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -CUDNN="" -if [ "$CUDNN_BENCHMARK" = "true" ] && [ "$PRECISION" = "fp16" ]; then - CUDNN=" --cudnn" -else - CUDNN="" -fi - - - -if [ "$CHECKPOINT" = "none" ] ; then - CHECKPOINT="" -else - CHECKPOINT=" --ckpt=${CHECKPOINT}" -fi - - -CMD=" train.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --eval_batch_size=$EVAL_BATCH_SIZE" -CMD+=" --num_epochs=$EPOCHS" -CMD+=" --output_dir=$RESULT_DIR" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --lr=$LEARNING_RATE" -CMD+=" --lr_warmup=$LEARNING_RATE_WARMUP" -CMD+=" --seed=$SEED" -CMD+=" --optimizer=adam" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json" -CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json" -CMD+=" --weight_decay=1e-3" -CMD+=" --save_freq=100" -CMD+=" --eval_freq=1" -CMD+=" --train_freq=250" -CMD+=" --lr_decay" -CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS " -CMD+=" $CHECKPOINT" -CMD+=" $PREC" -CMD+=" $CUDNN" - - -if [ "${LAUNCH_OPT}" != "none" ]; then - CMD="python -m $LAUNCH_OPT $CMD" -elif [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m multiproc --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "rnnt_train_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE=$RESULT_DIR/$TAG.$DATESTAMP.log - printf "Logs written to %s\n" "$LOGFILE" -fi - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee $LOGFILE -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train_benchmark.sh deleted file mode 100644 index 7b5a33705ca..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/scripts/train_benchmark.sh +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/bin/bash - -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -MODEL_CONFIG=${2:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${3:-"/results"} -CREATE_LOGFILE=${4:-"true"} -CUDNN_BENCHMARK=${5:-"true"} -NUM_GPUS=${6:-8} -PRECISION=${7:-"fp16"} -NUM_STEPS=${8:-"-1"} -MAX_DURATION=${9:-16.7} -SEED=${10:-0} -BATCH_SIZE=${11:-64} -LEARNING_RATE=${12:-"0.015"} -GRADIENT_ACCUMULATION_STEPS=${13:-1} -PRINT_FREQUENCY=${14:-1} - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC=" --fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -STEPS="" -if [ "$NUM_STEPS" -ne "-1" ] ; then - STEPS=" --num_steps=$NUM_STEPS" -elif [ "$NUM_STEPS" = "-1" ] ; then - STEPS="" -else - echo "Unknown argument" - exit -2 -fi - -CUDNN="" -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN=" --cudnn" -else - CUDNN="" -fi - - -CMD=" train.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --num_epochs=400" -CMD+=" --output_dir=$RESULT_DIR" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --lr=$LEARNING_RATE" -CMD+=" --seed=$SEED" -CMD+=" --optimizer=novograd" -CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json" -CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json" -CMD+=" --weight_decay=1e-3" -CMD+=" --save_freq=100000" -CMD+=" --eval_freq=100000" -CMD+=" --max_duration=$MAX_DURATION" -CMD+=" --pad_to_max" -CMD+=" --train_freq=$PRINT_FREQUENCY" -CMD+=" --lr_decay" -CMD+=" $CUDNN" -CMD+=" $PREC" -CMD+=" $STEPS" - -if [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "jasper_train_benchmark_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" - -fi - -if [ -z "$LOGFILE" ] ; then - - set -x - $CMD - set +x -else - - set -x - ( - $CMD - ) |& tee "$LOGFILE" - - set +x - - mean_latency=`cat "$LOGFILE" | grep 'Step time' | awk '{print $3}' | tail -n +2 | egrep -o '[0-9.]+'| awk 'BEGIN {total=0} {total+=$1} END {printf("%.2f\n",total/NR)}'` - mean_throughput=`python -c "print($BATCH_SIZE*$NUM_GPUS/${mean_latency})"` - training_wer_per_pgu=`cat "$LOGFILE" | grep 'training_batch_WER'| awk '{print $2}' | tail -n 1 | egrep -o '[0-9.]+'` - training_loss_per_pgu=`cat "$LOGFILE" | grep 'Loss@Step'| awk '{print $4}' | tail -n 1 | egrep -o '[0-9.]+'` - final_eval_wer=`cat "$LOGFILE" | grep 'Evaluation WER'| tail -n 1 | egrep -o '[0-9.]+'` - final_eval_loss=`cat "$LOGFILE" | grep 'Evaluation Loss'| tail -n 1 | egrep -o '[0-9.]+'` - - echo "max duration: $MAX_DURATION s" | tee -a "$LOGFILE" - echo "mean_latency: $mean_latency s" | tee -a "$LOGFILE" - echo "mean_throughput: $mean_throughput sequences/s" | tee -a "$LOGFILE" - echo "training_wer_per_pgu: $training_wer_per_pgu" | tee -a "$LOGFILE" - echo "training_loss_per_pgu: $training_loss_per_pgu" | tee -a "$LOGFILE" - echo "final_eval_loss: $final_eval_loss" | tee -a "$LOGFILE" - echo "final_eval_wer: $final_eval_wer" | tee -a "$LOGFILE" -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/convert_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/convert_librispeech.py deleted file mode 100644 index 4b7c84a4fe2..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/convert_librispeech.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import os -import glob -import multiprocessing -import json - -import pandas as pd - -from preprocessing_utils import parallel_preprocess - -parser = argparse.ArgumentParser(description='Preprocess LibriSpeech.') -parser.add_argument('--input_dir', type=str, required=True, - help='LibriSpeech collection input dir') -parser.add_argument('--dest_dir', type=str, required=True, - help='Output dir') -parser.add_argument('--output_json', type=str, default='./', - help='name of the output json file.') -parser.add_argument('-s', '--speed', type=float, nargs='*', - help='Speed perturbation ratio') -parser.add_argument('--target_sr', type=int, default=None, - help='Target sample rate. ' - 'defaults to the input sample rate') -parser.add_argument('--overwrite', action='store_true', - help='Overwrite file if exists') -parser.add_argument('--parallel', type=int, default=multiprocessing.cpu_count(), - help='Number of threads to use when processing audio files') -args = parser.parse_args() - -args.input_dir = args.input_dir.rstrip('/') -args.dest_dir = args.dest_dir.rstrip('/') - - -def build_input_arr(input_dir): - txt_files = glob.glob(os.path.join(input_dir, '**', '*.trans.txt'), - recursive=True) - input_data = [] - for txt_file in txt_files: - rel_path = os.path.relpath(txt_file, input_dir) - with open(txt_file) as fp: - for line in fp: - fname, _, transcript = line.partition(' ') - input_data.append(dict(input_relpath=os.path.dirname(rel_path), - input_fname=fname + '.flac', - transcript=transcript)) - return input_data - - -print("[%s] Scanning input dir..." % args.output_json) -dataset = build_input_arr(input_dir=args.input_dir) - -print("[%s] Converting audio files..." % args.output_json) -dataset = parallel_preprocess(dataset=dataset, - input_dir=args.input_dir, - dest_dir=args.dest_dir, - target_sr=args.target_sr, - speed=args.speed, - overwrite=args.overwrite, - parallel=args.parallel) - -print("[%s] Generating json..." % args.output_json) -df = pd.DataFrame(dataset, dtype=object) - -# Save json with python. df.to_json() produces back slashed in file paths -dataset = df.to_dict(orient='records') -with open(args.output_json, 'w') as fp: - json.dump(dataset, fp, indent=2) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_librispeech.py deleted file mode 100644 index 96cc277d76d..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_librispeech.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import argparse -import pandas as pd - -from download_utils import download_file, md5_checksum, extract - -parser = argparse.ArgumentParser( - description='Download, verify and extract dataset files') -parser.add_argument('csv', type=str, - help='CSV file with urls and checksums to download.') -parser.add_argument('dest', type=str, - help='Download destination folder.') -parser.add_argument('-e', type=str, default=None, - help='Extraction destination folder. Defaults to download folder if not provided') -parser.add_argument('--skip_download', action='store_true', - help='Skip downloading the files') -parser.add_argument('--skip_checksum', action='store_true', - help='Skip checksum') -parser.add_argument('--skip_extract', action='store_true', - help='Skip extracting files') -args = parser.parse_args() -args.e = args.e or args.dest - - -df = pd.read_csv(args.csv, delimiter=',') - - -if not args.skip_download: - for url in df.url: - fname = url.split('/')[-1] - print("Downloading %s:" % fname) - download_file(url=url, dest_folder=args.dest, fname=fname) -else: - print("Skipping file download") - - -if not args.skip_checksum: - for index, row in df.iterrows(): - url = row['url'] - md5 = row['md5'] - fname = url.split('/')[-1] - fpath = os.path.join(args.dest, fname) - print("Verifying %s: " % fname, end='') - ret = md5_checksum(fpath=fpath, target_hash=md5) - if not ret: - raise ValueError(f"Checksum for {fname} failed!") - else: - print(f"Checksum correct for {fname}") -else: - print("Skipping checksum") - - -if not args.skip_extract: - for url in df.url: - fname = url.split('/')[-1] - fpath = os.path.join(args.dest, fname) - print("Decompressing %s:" % fpath) - extract(fpath=fpath, dest_folder=args.e) -else: - print("Skipping file extraction") diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_utils.py deleted file mode 100644 index 6dbc3cf17a2..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/download_utils.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import hashlib -import requests -import os -import tarfile -import tqdm - - -def download_file(url, dest_folder, fname, overwrite=False): - fpath = os.path.join(dest_folder, fname) - if os.path.isfile(fpath): - if overwrite: - print("Overwriting existing file") - else: - print("File exists, skipping download.") - return - - tmp_fpath = fpath + '.tmp' - - r = requests.get(url, stream=True) - file_size = int(r.headers['Content-Length']) - chunk_size = 1024 * 1024 # 1MB - total_chunks = int(file_size / chunk_size) - - with open(tmp_fpath, 'wb') as fp: - content_iterator = r.iter_content(chunk_size=chunk_size) - chunks = tqdm.tqdm(content_iterator, total=total_chunks, - unit='MB', desc=fpath, leave=True) - for chunk in chunks: - fp.write(chunk) - - os.rename(tmp_fpath, fpath) - - -def md5_checksum(fpath, target_hash): - file_hash = hashlib.md5() - with open(fpath, "rb") as fp: - for chunk in iter(lambda: fp.read(1024 * 1024), b""): - file_hash.update(chunk) - return file_hash.hexdigest() == target_hash - - -def extract(fpath, dest_folder): - if fpath.endswith('.tar.gz'): - mode = 'r:gz' - elif fpath.endswith('.tar'): - mode = 'r:' - else: - raise IOError('fpath has unknown extension: %s' % fpath) - - with tarfile.open(fpath, mode) as tar: - members = tar.getmembers() - for member in tqdm.tqdm(iterable=members, total=len(members), leave=True): - tar.extract(path=dest_folder, member=member) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/preprocessing_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/preprocessing_utils.py deleted file mode 100644 index e32dfd8bc64..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch/utils/preprocessing_utils.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import multiprocessing -import functools - -import sox - - -from tqdm import tqdm - - -def preprocess(data, input_dir, dest_dir, target_sr=None, speed=None, - overwrite=True): - speed = speed or [] - speed.append(1) - speed = list(set(speed)) # Make unique - - input_fname = os.path.join(input_dir, - data['input_relpath'], - data['input_fname']) - input_sr = sox.file_info.sample_rate(input_fname) - target_sr = target_sr or input_sr - - os.makedirs(os.path.join(dest_dir, data['input_relpath']), exist_ok=True) - - output_dict = {} - output_dict['transcript'] = data['transcript'].lower().strip() - output_dict['files'] = [] - - fname = os.path.splitext(data['input_fname'])[0] - for s in speed: - output_fname = fname + \ - '{}.wav'.format('' if s == 1 else '-{}'.format(s)) - output_fpath = os.path.join(dest_dir, - data['input_relpath'], - output_fname) - - if not os.path.exists(output_fpath) or overwrite: - cbn = sox.Transformer().speed(factor=s).convert(target_sr) - cbn.build(input_fname, output_fpath) - - file_info = sox.file_info.info(output_fpath) - file_info['fname'] = os.path.join(os.path.basename(dest_dir), - data['input_relpath'], - output_fname) - file_info['speed'] = s - output_dict['files'].append(file_info) - - if s == 1: - file_info = sox.file_info.info(output_fpath) - output_dict['original_duration'] = file_info['duration'] - output_dict['original_num_samples'] = file_info['num_samples'] - - return output_dict - - -def parallel_preprocess(dataset, input_dir, dest_dir, target_sr, speed, overwrite, parallel): - with multiprocessing.Pool(parallel) as p: - func = functools.partial(preprocess, - input_dir=input_dir, dest_dir=dest_dir, - target_sr=target_sr, speed=speed, overwrite=overwrite) - dataset = list(tqdm(p.imap(func, dataset), total=len(dataset))) - return dataset diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch_SUT.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch_SUT.py deleted file mode 100644 index 615df2abbc5..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/pytorch_SUT.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2020, Cerebras Systems, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -sys.path.insert(0, os.path.join(os.getcwd(), "pytorch")) - -import array -import torch -import numpy as np -import toml -import mlperf_loadgen as lg -from tqdm import tqdm - -from QSL import AudioQSL, AudioQSLInMemory -from decoders import ScriptGreedyDecoder -from helpers import add_blank_label -from preprocessing import AudioPreprocessing -from model_separable_rnnt import RNNT - - -def load_and_migrate_checkpoint(ckpt_path): - checkpoint = torch.load(ckpt_path, map_location="cpu") - migrated_state_dict = {} - for key, value in checkpoint['state_dict'].items(): - key = key.replace("joint_net", "joint.net") - migrated_state_dict[key] = value - del migrated_state_dict["audio_preprocessor.featurizer.fb"] - del migrated_state_dict["audio_preprocessor.featurizer.window"] - return migrated_state_dict - - -class PytorchSUT: - def __init__(self, config_toml, checkpoint_path, dataset_dir, - manifest_filepath, perf_count): - config = toml.load(config_toml) - - dataset_vocab = config['labels']['labels'] - rnnt_vocab = add_blank_label(dataset_vocab) - featurizer_config = config['input_eval'] - - self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries) - self.qsl = AudioQSLInMemory(dataset_dir, - manifest_filepath, - dataset_vocab, - featurizer_config["sample_rate"], - perf_count) - self.audio_preprocessor = AudioPreprocessing(**featurizer_config) - self.audio_preprocessor.eval() - self.audio_preprocessor = torch.jit.script(self.audio_preprocessor) - self.audio_preprocessor = torch.jit._recursive.wrap_cpp_module( - torch._C._freeze_module(self.audio_preprocessor._c)) - - model = RNNT( - feature_config=featurizer_config, - rnnt=config['rnnt'], - num_classes=len(rnnt_vocab) - ) - model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path), - strict=True) - model.eval() - self.greedy_decoder = ScriptGreedyDecoder(len(rnnt_vocab) - 1, model) - - def issue_queries(self, query_samples): - for query_sample in query_samples: - waveform = self.qsl[query_sample.index] - assert waveform.ndim == 1 - waveform_length = np.array(waveform.shape[0], dtype=np.int64) - waveform = np.expand_dims(waveform, 0) - waveform_length = np.expand_dims(waveform_length, 0) - with torch.no_grad(): - waveform = torch.from_numpy(waveform) - waveform_length = torch.from_numpy(waveform_length) - feature, feature_length = self.audio_preprocessor.forward((waveform, waveform_length)) - assert feature.ndim == 3 - assert feature_length.ndim == 1 - feature = feature.permute(2, 0, 1) - - _, _, transcript = self.greedy_decoder.forward(feature, feature_length) - - assert len(transcript) == 1 - response_array = array.array('q', transcript[0]) - bi = response_array.buffer_info() - response = lg.QuerySampleResponse(query_sample.id, bi[0], - bi[1] * response_array.itemsize) - lg.QuerySamplesComplete([response]) - - def flush_queries(self): - pass - - def __del__(self): - lg.DestroySUT(self.sut) - print("Finished destroying SUT.") \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/requirements.txt b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/requirements.txt deleted file mode 100644 index 61b7304a20c..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -neural-compressor -sox -absl-py -toml -text-unidecode -inflect -librosa -torch <= 1.13.1 -tqdm -numpy <= 1.24.4 \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run.sh deleted file mode 100644 index 7538df99bdb..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run.sh +++ /dev/null @@ -1,90 +0,0 @@ -#/bin/bash - -set -euo pipefail - -work_dir=/export/b07/ws15dgalvez/mlperf-rnnt-librispeech -local_data_dir=$work_dir/local_data -librispeech_download_dir=$local_data_dir/LibriSpeech -stage=3 - -mkdir -p $work_dir $local_data_dir $librispeech_download_dir - -install_dir=third_party/install -mkdir -p $install_dir -install_dir=$(readlink -f $install_dir) - -set +u -source "$($CONDA_EXE info --base)/etc/profile.d/conda.sh" -set -u - -# stage -1: install dependencies -if [[ $stage -le -1 ]]; then - conda env create --force -v --file environment.yml - - set +u - source "$(conda info --base)/etc/profile.d/conda.sh" - conda activate mlperf-rnnt - set -u - - # We need to convert .flac files to .wav files via sox. Not all sox installs have flac support, so we install from source. - wget https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz -O third_party/flac-1.3.2.tar.xz - (cd third_party; tar xf flac-1.3.2.tar.xz; cd flac-1.3.2; ./configure --prefix=$install_dir && make && make install) - - wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz -O third_party/sox-14.4.2.tar.gz - (cd third_party; tar zxf sox-14.4.2.tar.gz; cd sox-14.4.2; LDFLAGS="-L${install_dir}/lib" CFLAGS="-I${install_dir}/include" ./configure --prefix=$install_dir --with-flac && make && make install) - - (cd $(git rev-parse --show-toplevel)/loadgen; python setup.py install) -fi - -export PATH="$install_dir/bin/:$PATH" - -set +u -conda activate mlperf-rnnt -set -u - -# stage 0: download model. Check checksum to skip? -if [[ $stage -le 0 ]]; then - wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O $work_dir/rnnt.pt -fi - -# stage 1: download data. This will hae a non-zero exit code if the -# checksum is incorrect. -if [[ $stage -le 1 ]]; then - python pytorch/utils/download_librispeech.py \ - pytorch/utils/librispeech-inference.csv \ - $librispeech_download_dir \ - -e $local_data_dir -fi - -if [[ $stage -le 2 ]]; then - python pytorch/utils/convert_librispeech.py \ - --input_dir $librispeech_download_dir/dev-clean \ - --dest_dir $local_data_dir/dev-clean-wav \ - --output_json $local_data_dir/dev-clean-wav.json -fi - -if [[ $stage -le 3 ]]; then - for backend in pytorch; do - for accuracy in "--accuracy" ""; do - for scenario in SingleStream Offline Server; do - log_dir=${work_dir}/${scenario}_${backend} - if [ ! -z ${accuracy} ]; then - log_dir+=_accuracy - fi - log_dir+=rerun - - python run.py --backend pytorch \ - --dataset_dir $local_data_dir \ - --manifest $local_data_dir/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --pytorch_checkpoint $work_dir/rnnt.pt \ - --scenario ${scenario} \ - --backend ${backend} \ - --log_dir ${log_dir} \ - ${accuracy} & - - done - done - done - wait -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_benchmark.sh deleted file mode 100644 index 02c20b21e8e..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_benchmark.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - tuned_checkpoint=saved_results - for var in "$@" - do - case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - --iters=*) - iters=$(echo ${var} |cut -f2 -d=) - ;; - --int8=*) - int8=$(echo ${var} |cut -f2 -d=) - ;; - --config=*) - tuned_checkpoint=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# run_benchmark -function run_benchmark { - - if [[ ${mode} == "accuracy" ]]; then - mode_cmd="--accuracy " - elif [[ ${mode} == "performance" ]]; then - mode_cmd="--performance " - else - echo "Error: No such mode: ${mode}" - exit 1 - fi - - extra_cmd="" - if [ -n "$dataset_location" ];then - extra_cmd=$extra_cmd"--dataset_dir ${dataset_location} " - fi - if [ -n "$input_model" ];then - extra_cmd=$extra_cmd"--pytorch_checkpoint ${input_model} " - fi - if [ -n "$tuned_checkpoint" ];then - extra_cmd=$extra_cmd"--tuned_checkpoint ${tuned_checkpoint} " - fi - if [[ ${int8} == "true" ]]; then - extra_cmd=$extra_cmd"--int8" - fi - - python run_tune.py \ - --backend pytorch \ - --manifest $dataset_location/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --scenario SingleStream \ - ${mode_cmd} \ - ${extra_cmd} -} - -main "$@" diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_quant.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_quant.sh deleted file mode 100644 index 63f0b6d9231..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_quant.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_tuning - -} - -# init params -function init_params { - tuned_checkpoint=saved_results - for var in "$@" - do - case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - tuned_checkpoint=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# run_tuning -function run_tuning { - extra_cmd="" - if [ -n "$dataset_location" ];then - extra_cmd=$extra_cmd"--dataset_dir ${dataset_location} " - fi - if [ -n "$input_model" ];then - extra_cmd=$extra_cmd"--pytorch_checkpoint ${input_model} " - fi - if [ -n "$tuned_checkpoint" ];then - extra_cmd=$extra_cmd"--tuned_checkpoint ${tuned_checkpoint} " - fi - - python run_tune.py \ - --tune \ - --backend pytorch \ - --manifest $dataset_location/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --scenario Offline \ - ${extra_cmd} -} - -main "$@" \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_tune.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_tune.py deleted file mode 100644 index 4400ffa179d..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/run_tune.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2020 The MLPerf Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import argparse -import mlperf_loadgen as lg -import subprocess - -import time -import os -from pathlib import Path -import re - -MLPERF_CONF = Path(os.path.dirname(os.path.realpath(__file__))) / "./mlperf.conf" -MLPERF_CONF = MLPERF_CONF.resolve() - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--tune', dest='tune', action='store_true', - help='tune best int8 model on calibration dataset') - parser.add_argument("--backend", choices=["pytorch"], default="pytorch", help="Backend") - parser.add_argument("--scenario", choices=["SingleStream", "Offline", "Server"], - default="Offline", help="Scenario") - parser.add_argument("--mlperf_conf", default=str(MLPERF_CONF), help="mlperf rules config") - parser.add_argument("--user_conf", default="user.conf", - help="user config for user LoadGen settings such as target QPS") - parser.add_argument("--pytorch_config_toml", default="pytorch/configs/rnnt.toml") - parser.add_argument("--pytorch_checkpoint", default="pytorch/work_dir/rnnt.pt") - parser.add_argument("--dataset_dir", required=True) - parser.add_argument("--manifest", required=True) - parser.add_argument("--perf_count", type=int, default=None) - parser.add_argument("--log_dir", default='./saved_log') - parser.add_argument('--performance', dest='performance', action='store_true', - help='run benchmark') - parser.add_argument("--accuracy", dest='accuracy', action='store_true', - help='For accuracy measurement only.') - parser.add_argument('--int8', dest='int8', action='store_true', help='load int8 model') - parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH', - help='path to checkpoint tuned by Neural Compressor (default: ./)') - args = parser.parse_args() - return args - - -scenario_map = { - "SingleStream": lg.TestScenario.SingleStream, - "Offline": lg.TestScenario.Offline, - "Server": lg.TestScenario.Server, -} - - -def main(): - args = get_args() - - if args.backend == "pytorch": - from pytorch_SUT import PytorchSUT - sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, - args.dataset_dir, args.manifest, args.perf_count) - model = sut.greedy_decoder._model - else: - raise ValueError("Unknown backend: {:}".format(args.backend)) - - settings = lg.TestSettings() - settings.scenario = scenario_map[args.scenario] - settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) - settings.FromConfig(args.user_conf, "rnnt", args.scenario) - - if args.performance: - settings.mode = lg.TestMode.PerformanceOnly - else: - settings.mode = lg.TestMode.AccuracyOnly - - log_path = args.log_dir - os.makedirs(log_path, exist_ok=True) - log_output_settings = lg.LogOutputSettings() - log_output_settings.outdir = log_path - log_output_settings.copy_summary_to_stdout = True - log_settings = lg.LogSettings() - log_settings.log_output = log_output_settings - - pattern = ['accuracy=\d+.\d+', 'samples_per_query : \d+', 'Mean latency.*'] - - def eval_func(model): - print("Running Loadgen test...") - sut.greedy_decoder._model = model - lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - cmd = f"python3 accuracy_eval.py --log_dir {log_path} \ - --dataset_dir {args.dataset_dir} --manifest {args.manifest}" - out = subprocess.check_output(cmd, shell=True) - out = out.decode() - regex_accu = re.compile(pattern[0]) - accu = float(regex_accu.findall(out)[0].split('=')[1]) - print('Accuracy: %.3f ' % (accu)) - return accu - - def benchmark(model): - print("Running Loadgen test...") - sut.greedy_decoder._model = model - lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - file_path = os.path.join(log_path, 'mlperf_log_summary.txt') - f = open(file_path, 'r', encoding='UTF-8') - file_content = f.read() - f.close() - regex_batch = re.compile(pattern[1]) - regex_late = re.compile(pattern[2]) - samples_per_query = int(regex_batch.findall(file_content)[0].split(': ')[1]) - latency_per_sample = int(regex_late.findall(file_content)[0].split(': ')[1]) - print('Batch size = %d' % samples_per_query) - print('Latency: %.3f ms' % (latency_per_sample / 10**6)) - print('Throughput: %.3f samples/sec' % (10**9/latency_per_sample)) - - if args.tune: - from neural_compressor import PostTrainingQuantConfig - from neural_compressor import quantization - conf = PostTrainingQuantConfig(approach="dynamic") - q_model = quantization.fit(model, - conf, - eval_func=eval_func) - q_model.save(args.tuned_checkpoint) - return - - elif args.int8: - from neural_compressor.utils.pytorch import load - int8_model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model) - if args.accuracy: - eval_func(int8_model) - elif args.performance: - benchmark(int8_model) - else: - if args.accuracy: - eval_func(model) - elif args.performance: - benchmark(model) - - - print("Done!", flush=True) - - -if __name__ == "__main__": - main() diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/user.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/user.conf deleted file mode 100644 index 38057aaeade..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/user.conf +++ /dev/null @@ -1,6 +0,0 @@ -# Please set these fields depending on the performance of your system to -# override default LoadGen settings. -*.SingleStream.target_latency = 10 -*.MultiStream.target_latency = 80 -*.Server.target_qps = 1.0 -*.Offline.target_qps = 1.0 \ No newline at end of file