auto-generating sphinx docs

pytorch · Feb 14, 2025 · 2525056 · 2525056
1 parent 539f1e9
commit 2525056
Show file tree

Hide file tree

Showing 23 changed files with 330 additions and 276 deletions.
diff --git a/main/_downloads/315c4c52fb68082a731b192d944e2ede/tutorials_python.zip b/main/_downloads/315c4c52fb68082a731b192d944e2ede/tutorials_python.zip
diff --git a/main/_downloads/a5659940aa3f8f568547d47752a43172/tutorials_jupyter.zip b/main/_downloads/a5659940aa3f8f568547d47752a43172/tutorials_jupyter.zip
diff --git a/main/_downloads/e148c8862a389bde3e2c2727c00d1f30/template_tutorial.zip b/main/_downloads/e148c8862a389bde3e2c2727c00d1f30/template_tutorial.zip
diff --git a/main/_modules/torchao/dtypes/floatx/float8_layout.html b/main/_modules/torchao/dtypes/floatx/float8_layout.html
@@ -662,6 +662,7 @@ <h1>Source code for torchao.dtypes.floatx.float8_layout</h1><div class="highligh
 <span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;Implements matmul between FP8 input and FP8 weight with compute using _scaled_mm&quot;&quot;&quot;</span>
     <span class="n">scaled_mm_config</span> <span class="o">=</span> <span class="n">weight_tensor</span><span class="o">.</span><span class="n">_layout</span><span class="o">.</span><span class="n">mm_config</span>
+    <span class="k">assert</span> <span class="n">scaled_mm_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
     <span class="n">out_shape</span> <span class="o">=</span> <span class="n">get_out_shape</span><span class="p">(</span><span class="n">input_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">weight_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
 
     <span class="c1"># Weight tensor preprocessing</span>

diff --git a/main/_modules/torchao/quantization/quant_api.html b/main/_modules/torchao/quantization/quant_api.html
diff --git a/main/_sources/generated/torchao.quantization.fpx_weight_only.rst.txt b/main/_sources/generated/torchao.quantization.fpx_weight_only.rst.txt
@@ -6,4 +6,5 @@
 fpx_weight_only
 ===============
 
-.. autofunction:: fpx_weight_only
+.. autoclass:: fpx_weight_only
+    :members:
diff --git a/main/_sources/generated/torchao.quantization.gemlite_uintx_weight_only.rst.txt b/main/_sources/generated/torchao.quantization.gemlite_uintx_weight_only.rst.txt
@@ -6,4 +6,5 @@
 gemlite_uintx_weight_only
 =========================
 
-.. autofunction:: gemlite_uintx_weight_only
+.. autoclass:: gemlite_uintx_weight_only
+    :members:
diff --git a/...rces/generated/torchao.quantization.int8_dynamic_activation_int4_weight.rst.txt b/...rces/generated/torchao.quantization.int8_dynamic_activation_int4_weight.rst.txt
@@ -6,4 +6,5 @@
 int8_dynamic_activation_int4_weight
 ===================================
 
-.. autofunction:: int8_dynamic_activation_int4_weight
+.. autoclass:: int8_dynamic_activation_int4_weight
+    :members:
diff --git a/...rces/generated/torchao.quantization.int8_dynamic_activation_int8_weight.rst.txt b/...rces/generated/torchao.quantization.int8_dynamic_activation_int8_weight.rst.txt
@@ -6,4 +6,5 @@
 int8_dynamic_activation_int8_weight
 ===================================
 
-.. autofunction:: int8_dynamic_activation_int8_weight
+.. autoclass:: int8_dynamic_activation_int8_weight
+    :members:
diff --git a/main/_sources/generated/torchao.quantization.int8_weight_only.rst.txt b/main/_sources/generated/torchao.quantization.int8_weight_only.rst.txt
@@ -6,4 +6,5 @@
 int8_weight_only
 ================
 
-.. autofunction:: int8_weight_only
+.. autoclass:: int8_weight_only
+    :members:
diff --git a/main/_sources/generated/torchao.quantization.uintx_weight_only.rst.txt b/main/_sources/generated/torchao.quantization.uintx_weight_only.rst.txt
@@ -6,4 +6,5 @@
 uintx_weight_only
 =================
 
-.. autofunction:: uintx_weight_only
+.. autoclass:: uintx_weight_only
+    :members:
diff --git a/main/_sources/tutorials/template_tutorial.rst.txt b/main/_sources/tutorials/template_tutorial.rst.txt
@@ -67,11 +67,11 @@ Example code (the output below is generated automatically):
 
  .. code-block:: none
 
-    tensor([[0.7982, 0.9110, 0.8970],
-            [0.2665, 0.0490, 0.0855],
-            [0.3657, 0.5905, 0.3759],
-            [0.5376, 0.5672, 0.5224],
-            [0.3938, 0.8779, 0.4979]])
+    tensor([[0.4840, 0.4113, 0.5256],
+            [0.7492, 0.8293, 0.5182],
+            [0.7597, 0.8146, 0.5659],
+            [0.3916, 0.5604, 0.4016],
+            [0.9592, 0.7266, 0.7351]])
 
 
 

diff --git a/main/api_ref_quantization.html b/main/api_ref_quantization.html
@@ -434,19 +434,19 @@ <h2>Quantization APIs for <a href="#id1"><span class="problematic" id="id2">quan
 <td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">Int4WeightOnlyConfig</span></code></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="generated/torchao.quantization.int8_weight_only.html#torchao.quantization.int8_weight_only" title="torchao.quantization.int8_weight_only"><code class="xref py py-obj docutils literal notranslate"><span class="pre">int8_weight_only</span></code></a></p></td>
-<td><p>Applies int8 weight-only symmetric per-channel quantization to linear layers.</p></td>
+<td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">Int8WeightOnlyConfig</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="generated/torchao.quantization.int8_dynamic_activation_int4_weight.html#torchao.quantization.int8_dynamic_activation_int4_weight" title="torchao.quantization.int8_dynamic_activation_int4_weight"><code class="xref py py-obj docutils literal notranslate"><span class="pre">int8_dynamic_activation_int4_weight</span></code></a></p></td>
-<td><p>Applies int8 dynamic per token asymmetric activation quantization and int4 per group weight symmetric quantization to linear This is used to produce a model for executorch backend, but currently executorch did not support lowering for the quantized model from this flow yet</p></td>
+<td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">Int8DynamicActivationInt4WeightConfig</span></code></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="generated/torchao.quantization.int8_dynamic_activation_int8_weight.html#torchao.quantization.int8_dynamic_activation_int8_weight" title="torchao.quantization.int8_dynamic_activation_int8_weight"><code class="xref py py-obj docutils literal notranslate"><span class="pre">int8_dynamic_activation_int8_weight</span></code></a></p></td>
-<td><p>Applies int8 dynamic symmetric per-token activation and int8 per-channel weight quantization to linear layers</p></td>
+<td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">Int8DynamicActivationInt8WeightConfig</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="generated/torchao.quantization.uintx_weight_only.html#torchao.quantization.uintx_weight_only" title="torchao.quantization.uintx_weight_only"><code class="xref py py-obj docutils literal notranslate"><span class="pre">uintx_weight_only</span></code></a></p></td>
-<td><p>Applies uintx weight-only asymmetric per-group quantization to linear layers, using uintx quantization where x is the number of bits specified by <cite>dtype</cite></p></td>
+<td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">UIntXWeightOnlyConfig</span></code></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="generated/torchao.quantization.gemlite_uintx_weight_only.html#torchao.quantization.gemlite_uintx_weight_only" title="torchao.quantization.gemlite_uintx_weight_only"><code class="xref py py-obj docutils literal notranslate"><span class="pre">gemlite_uintx_weight_only</span></code></a></p></td>
-<td><p>applies weight only 4 or 8 bit integer quantization and utilizes the gemlite triton kernel and its associated weight packing format.</p></td>
+<td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">GemliteUIntXWeightOnlyConfig</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="generated/torchao.quantization.intx_quantization_aware_training.html#torchao.quantization.intx_quantization_aware_training" title="torchao.quantization.intx_quantization_aware_training"><code class="xref py py-obj docutils literal notranslate"><span class="pre">intx_quantization_aware_training</span></code></a></p></td>
 <td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">IntXQuantizationAwareTrainingConfig</span></code></p></td>
@@ -461,7 +461,7 @@ <h2>Quantization APIs for <a href="#id1"><span class="problematic" id="id2">quan
 <td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">Float8StaticActivationFloat8WeightConfig</span></code></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="generated/torchao.quantization.fpx_weight_only.html#torchao.quantization.fpx_weight_only" title="torchao.quantization.fpx_weight_only"><code class="xref py py-obj docutils literal notranslate"><span class="pre">fpx_weight_only</span></code></a></p></td>
-<td><p>Sub-byte floating point dtypes defined by <cite>ebits</cite>: exponent bits and <cite>mbits</cite>: mantissa bits e.g.</p></td>
+<td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">FPXWeightOnlyConfig</span></code></p></td>
 </tr>
 </tbody>
 </table>

diff --git a/main/generated/torchao.quantization.fpx_weight_only.html b/main/generated/torchao.quantization.fpx_weight_only.html
@@ -415,16 +415,10 @@
 
   <section id="fpx-weight-only">
 <h1>fpx_weight_only<a class="headerlink" href="#fpx-weight-only" title="Permalink to this heading">¶</a></h1>
-<dl class="py function">
+<dl class="py attribute">
 <dt class="sig sig-object py" id="torchao.quantization.fpx_weight_only">
-<span class="sig-prename descclassname"><span class="pre">torchao.quantization.</span></span><span class="sig-name descname"><span class="pre">fpx_weight_only</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ebits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">mbits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/torchao/quantization/quant_api.html#fpx_weight_only"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torchao.quantization.fpx_weight_only" title="Permalink to this definition">¶</a></dt>
-<dd><p>Sub-byte floating point dtypes defined by <cite>ebits</cite>: exponent bits and <cite>mbits</cite>: mantissa bits
-e.g. fp6_e3_m2, fp6_e2_m3, …
-The packing format and kernels are from the fp6-llm paper: <a class="reference external" href="https://arxiv.org/abs/2401.14112">https://arxiv.org/abs/2401.14112</a>
-github repo: <a class="reference external" href="https://github.com/usyd-fsalab/fp6_llm">https://github.com/usyd-fsalab/fp6_llm</a>, now renamed to quant-llm
-For more details for packing please see: <code class="xref py py-class docutils literal notranslate"><span class="pre">FpxTensorCoreAQTTensorImpl</span></code></p>
-<p>This is experimental, will be merged with <cite>to_affine_quantized_floatx</cite>
-in the future</p>
+<span class="sig-prename descclassname"><span class="pre">torchao.quantization.</span></span><span class="sig-name descname"><span class="pre">fpx_weight_only</span></span><a class="headerlink" href="#torchao.quantization.fpx_weight_only" title="Permalink to this definition">¶</a></dt>
+<dd><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">FPXWeightOnlyConfig</span></code></p>
 </dd></dl>
 
 </section>

diff --git a/main/generated/torchao.quantization.gemlite_uintx_weight_only.html b/main/generated/torchao.quantization.gemlite_uintx_weight_only.html
@@ -415,22 +415,10 @@
 
   <section id="gemlite-uintx-weight-only">
 <h1>gemlite_uintx_weight_only<a class="headerlink" href="#gemlite-uintx-weight-only" title="Permalink to this heading">¶</a></h1>
-<dl class="py function">
+<dl class="py attribute">
 <dt class="sig sig-object py" id="torchao.quantization.gemlite_uintx_weight_only">
-<span class="sig-prename descclassname"><span class="pre">torchao.quantization.</span></span><span class="sig-name descname"><span class="pre">gemlite_uintx_weight_only</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.13)"><span class="pre">Optional</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">packing_bitwidth</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">32</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">contiguous</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.13)"><span class="pre">Optional</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/torchao/quantization/quant_api.html#gemlite_uintx_weight_only"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torchao.quantization.gemlite_uintx_weight_only" title="Permalink to this definition">¶</a></dt>
-<dd><p>applies weight only 4 or 8 bit integer quantization and utilizes the gemlite triton kernel and its associated weight packing format.
-This only works for fp16 models. 8 bit quantization is symmetric, 4 bit quantization is asymmetric.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>group_size</strong> – parameter for quantization, controls the granularity of quantization, smaller
-size is more fine grained</p></li>
-<li><p><strong>bit_width</strong> – bit width of the quantized weight.</p></li>
-<li><p><strong>packing_bitwidth</strong> – bit width of the packed weight, should be 8 or 32. Can have performance impacts depending on hardware.</p></li>
-<li><p><strong>contiguous</strong> – if set, the weight will be packed as specified. Leaving it as None lets gemlite determine the best choice.</p></li>
-</ul>
-</dd>
-</dl>
+<span class="sig-prename descclassname"><span class="pre">torchao.quantization.</span></span><span class="sig-name descname"><span class="pre">gemlite_uintx_weight_only</span></span><a class="headerlink" href="#torchao.quantization.gemlite_uintx_weight_only" title="Permalink to this definition">¶</a></dt>
+<dd><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">GemliteUIntXWeightOnlyConfig</span></code></p>
 </dd></dl>
 
 </section>

diff --git a/main/generated/torchao.quantization.int8_dynamic_activation_int4_weight.html b/main/generated/torchao.quantization.int8_dynamic_activation_int4_weight.html
@@ -415,23 +415,10 @@
 
   <section id="int8-dynamic-activation-int4-weight">
 <h1>int8_dynamic_activation_int4_weight<a class="headerlink" href="#int8-dynamic-activation-int4-weight" title="Permalink to this heading">¶</a></h1>
-<dl class="py function">
+<dl class="py attribute">
 <dt class="sig sig-object py" id="torchao.quantization.int8_dynamic_activation_int4_weight">
-<span class="sig-prename descclassname"><span class="pre">torchao.quantization.</span></span><span class="sig-name descname"><span class="pre">int8_dynamic_activation_int4_weight</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">32</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">PlainLayout()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">MappingType.SYMMETRIC</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">act_mapping_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">MappingType.ASYMMETRIC</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/torchao/quantization/quant_api.html#int8_dynamic_activation_int4_weight"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torchao.quantization.int8_dynamic_activation_int4_weight" title="Permalink to this definition">¶</a></dt>
-<dd><p>Applies int8 dynamic per token asymmetric activation quantization and int4 per group weight symmetric quantization to linear
-This is used to produce a model for executorch backend, but currently executorch did not
-support lowering for the quantized model from this flow yet</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>group_size</strong> – parameter for quantization, controls the granularity of quantization, smaller
-size is more fine grained</p></li>
-<li><p><strong>layout</strong> – layout type for quantized weight tensor, only supports <cite>MarlinQQQLayout()</cite> and <cite>CutlassInt4PackedLayout()</cite> for now</p></li>
-<li><p><strong>mapping_type</strong> – quantization type for weight, controls the weight quantization is symmetric or asymmetric</p></li>
-<li><p><strong>act_mapping_type</strong> – quantization type for activation, controls the activation quantization is symmetric or asymmetric</p></li>
-</ul>
-</dd>
-</dl>
+<span class="sig-prename descclassname"><span class="pre">torchao.quantization.</span></span><span class="sig-name descname"><span class="pre">int8_dynamic_activation_int4_weight</span></span><a class="headerlink" href="#torchao.quantization.int8_dynamic_activation_int4_weight" title="Permalink to this definition">¶</a></dt>
+<dd><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">Int8DynamicActivationInt4WeightConfig</span></code></p>
 </dd></dl>
 
 </section>