I am done

2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions
--- a/rl/Lib/site-packages/torch/quantization/init.py
+++ b/rl/Lib/site-packages/torch/quantization/init.py
@ -0,0 +1,86 @@
+# mypy: allow-untyped-defs
+from .fake_quantize import *  # noqa: F403
+from .fuse_modules import fuse_modules
+from .fuser_method_mappings import *  # noqa: F403
+from .observer import *  # noqa: F403
+from .qconfig import *  # noqa: F403
+from .quant_type import *  # noqa: F403
+from .quantization_mappings import *  # noqa: F403
+from .quantize import *  # noqa: F403
+from .quantize_jit import *  # noqa: F403
+from .stubs import *  # noqa: F403
+
+
+def default_eval_fn(model, calib_data):
+    r"""
+    Default evaluation function takes a torch.utils.data.Dataset or a list of
+    input Tensors and run the model on the dataset
+    """
+    for data, target in calib_data:
+        model(data)
+
+
+__all__ = [
+    "QuantWrapper",
+    "QuantStub",
+    "DeQuantStub",
+    # Top level API for eager mode quantization
+    "quantize",
+    "quantize_dynamic",
+    "quantize_qat",
+    "prepare",
+    "convert",
+    "prepare_qat",
+    # Top level API for graph mode quantization on TorchScript
+    "quantize_jit",
+    "quantize_dynamic_jit",
+    "_prepare_ondevice_dynamic_jit",
+    "_convert_ondevice_dynamic_jit",
+    "_quantize_ondevice_dynamic_jit",
+    # Top level API for graph mode quantization on GraphModule(torch.fx)
+    # 'fuse_fx', 'quantize_fx',  # TODO: add quantize_dynamic_fx
+    # 'prepare_fx', 'prepare_dynamic_fx', 'convert_fx',
+    "QuantType",  # quantization type
+    # custom module APIs
+    "get_default_static_quant_module_mappings",
+    "get_static_quant_module_class",
+    "get_default_dynamic_quant_module_mappings",
+    "get_default_qat_module_mappings",
+    "get_default_qconfig_propagation_list",
+    "get_default_compare_output_module_list",
+    "get_quantized_operator",
+    "get_fuser_method",
+    # Sub functions for `prepare` and `swap_module`
+    "propagate_qconfig_",
+    "add_quant_dequant",
+    "swap_module",
+    "default_eval_fn",
+    # Observers
+    "ObserverBase",
+    "WeightObserver",
+    "HistogramObserver",
+    "observer",
+    "default_observer",
+    "default_weight_observer",
+    "default_placeholder_observer",
+    "default_per_channel_weight_observer",
+    # FakeQuantize (for qat)
+    "default_fake_quant",
+    "default_weight_fake_quant",
+    "default_fixed_qparams_range_neg1to1_fake_quant",
+    "default_fixed_qparams_range_0to1_fake_quant",
+    "default_per_channel_weight_fake_quant",
+    "default_histogram_fake_quant",
+    # QConfig
+    "QConfig",
+    "default_qconfig",
+    "default_dynamic_qconfig",
+    "float16_dynamic_qconfig",
+    "float_qparams_weight_only_qconfig",
+    # QAT utilities
+    "default_qat_qconfig",
+    "prepare_qat",
+    "quantize_qat",
+    # module transformations
+    "fuse_modules",
+]
--- a/rl/Lib/site-packages/torch/quantization/pycache/init.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/init.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/_numeric_suite.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/_numeric_suite.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/_numeric_suite_fx.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/_numeric_suite_fx.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/_quantized_conversions.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/_quantized_conversions.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/fake_quantize.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/fake_quantize.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/fuse_modules.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/fuse_modules.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/fuser_method_mappings.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/fuser_method_mappings.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/observer.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/observer.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/qconfig.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/qconfig.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/quant_type.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/quant_type.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/quantization_mappings.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/quantization_mappings.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/quantize.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/quantize.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/quantize_fx.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/quantize_fx.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/quantize_jit.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/quantize_jit.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/stubs.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/stubs.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/pycache/utils.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/pycache/utils.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/_numeric_suite.py
+++ b/rl/Lib/site-packages/torch/quantization/_numeric_suite.py
@ -0,0 +1,28 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/ns/_numeric_suite.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.ns._numeric_suite import (
+    _convert_tuple_to_list,
+    _dequantize_tensor_list,
+    _find_match,
+    _get_logger_dict_helper,
+    _is_identical_module_type,
+    compare_model_outputs,
+    compare_model_stub,
+    compare_weights,
+    get_logger_dict,
+    get_matching_activations,
+    Logger,
+    NON_LEAF_MODULE_TO_ADD_OBSERVER_ALLOW_LIST,
+    OutputLogger,
+    prepare_model_outputs,
+    prepare_model_with_stubs,
+    Shadow,
+    ShadowLogger,
+)
--- a/rl/Lib/site-packages/torch/quantization/_numeric_suite_fx.py
+++ b/rl/Lib/site-packages/torch/quantization/_numeric_suite_fx.py
@ -0,0 +1,26 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/ns/_numeric_suite_fx.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.ns._numeric_suite_fx import (
+    _add_loggers_impl,
+    _add_loggers_one_model,
+    _add_shadow_loggers_impl,
+    _extract_logger_info_one_model,
+    _extract_weights_impl,
+    _extract_weights_one_model,
+    add_loggers,
+    add_shadow_loggers,
+    extend_logger_results_with_comparison,
+    extract_logger_info,
+    extract_shadow_logger_info,
+    extract_weights,
+    NSTracer,
+    OutputLogger,
+    RNNReturnType,
+)
--- a/rl/Lib/site-packages/torch/quantization/_quantized_conversions.py
+++ b/rl/Lib/site-packages/torch/quantization/_quantized_conversions.py
@ -0,0 +1,133 @@
+# mypy: allow-untyped-defs
+import torch
+
+
+# Pack pairs of int4 values into int8, in row major order; first int4
+# value goes into lower order bits, and second int4 value into higher
+# order bits of resulting int8 value.
+def pack_int4_to_int8(weight):
+    assert weight.dim() == 2
+    assert weight.shape[1] % 2 == 0
+    assert weight.dtype == torch.int8
+    return ((weight[:, 1::2] & 0xF) << 4) | (weight[:, 0::2] & 0xF)
+
+
+# Unpack quandruples of bits in int8 values into int4 values, in row
+# major order; lower 4 bits go into first int4 value goes, and upper 4
+# bits go into second int4 value.
+def unpack_int8_to_int4(weight):
+    assert weight.dim() == 2
+    assert weight.dtype == torch.int8
+    return torch.stack((weight & 0xF, (weight >> 4) & 0xF), dim=2).view(
+        weight.shape[0], 2 * weight.shape[1]
+    )
+
+
+# Transpose the weight matrix, and then reorder its elements according
+# to underlying requirements of CUTLASS library, so that it could be
+# used for CUTLASS-based mixed datatypes linear operation.
+def quantized_weight_reorder_for_mixed_dtypes_linear_cutlass(
+    weight, dtypeq, transpose=False
+):
+    assert weight.dim() == 2
+    assert weight.dtype == torch.int8
+    assert dtypeq == torch.int8 or dtypeq == torch.quint4x2
+    assert weight.device.type == "cuda"
+
+    device = weight.device
+
+    # subbyte_transpose
+    if not transpose:
+        if dtypeq == torch.int8:
+            outp = weight.T
+        elif dtypeq == torch.quint4x2:
+            outp = pack_int4_to_int8(unpack_int8_to_int4(weight.view(torch.int8)).T)
+    else:
+        outp = weight
+
+    ncols, nrows = outp.shape  # type: ignore[possibly-undefined]
+    assert nrows % (32 if dtypeq == torch.quint4x2 else 64) == 0
+    assert ncols % 64 == 0
+
+    # permute_B_rows_for_mixed_gemm
+    # (permute cols actually, as transpose is applied first here)
+    if dtypeq == torch.quint4x2:
+        cols_permuted = (
+            torch.tensor(
+                [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15],
+                device=device,
+            )
+            + (torch.arange(0, nrows // 16, device=device).reshape(-1, 1) * 16).expand(
+                nrows // 16, 16
+            )
+        ).view(-1)
+    else:
+        cols_permuted = (
+            torch.tensor(
+                [0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15],
+                device=device,
+            )
+            + (torch.arange(0, nrows // 16, device=device).reshape(-1, 1) * 16).expand(
+                nrows // 16, 16
+            )
+        ).view(-1)
+    outp = outp.index_copy(1, cols_permuted, outp)
+
+    # interleave_column_major_tensor
+    magic0 = 4 if dtypeq == torch.quint4x2 else 2
+    magic1 = 32 // magic0
+
+    tmp0 = (
+        (torch.arange(0, ncols // magic0, device=device) * (nrows // 4 * magic0))
+        .view(-1, 1)
+        .repeat(1, nrows // 4 * magic0)
+        .view(-1)
+    )
+    tmp1 = (
+        (torch.arange(0, nrows // 4 // magic1, device=device) * (magic0 * magic1))
+        .view(-1, 1)
+        .repeat(1, magic1)
+        .view(-1)
+        .repeat(ncols)
+    )
+    tmp2 = (
+        (torch.arange(0, magic0, device=device) * magic1)
+        .view(-1, 1)
+        .repeat(1, nrows // 4)
+        .view(-1)
+        .repeat(ncols // magic0)
+    )
+    tmp3 = torch.arange(0, magic1, device=device).repeat(nrows // 4 * ncols // magic1)
+
+    outp_offsets = tmp0 + tmp1 + tmp2 + tmp3
+
+    tmp = outp.view(-1).view(torch.int32)
+    outp = torch.zeros_like(tmp)
+    outp.scatter_(0, outp_offsets, tmp)
+    outp = outp.view(weight.dtype)
+
+    # add_bias_and_interleave_quantized_tensor_inplace
+    tmp = outp.view(-1)
+
+    outp = torch.empty_like(tmp)
+    if dtypeq == torch.int8:
+        tmp = (tmp.to(torch.int) + 128).to(tmp.dtype)
+        outp[0::4] = tmp[0::4]
+        outp[1::4] = tmp[2::4]
+        outp[2::4] = tmp[1::4]
+        outp[3::4] = tmp[3::4]
+    elif dtypeq == torch.quint4x2:
+        tmp0 = ((tmp & 0xF) + 8) & 0xF
+        tmp0 = (tmp0[1::2] << 4) | tmp0[0::2]
+        tmp1 = (((tmp >> 4) & 0xF) + 8) & 0xF
+        tmp1 = (tmp1[1::2] << 4) | tmp1[0::2]
+        outp[0::4] = tmp0[0::2]
+        outp[1::4] = tmp0[1::2]
+        outp[2::4] = tmp1[0::2]
+        outp[3::4] = tmp1[1::2]
+
+    if dtypeq == torch.quint4x2:
+        nrows *= 2
+        ncols //= 2
+
+    return outp.view(nrows, ncols).view(torch.uint8)
--- a/rl/Lib/site-packages/torch/quantization/fake_quantize.py
+++ b/rl/Lib/site-packages/torch/quantization/fake_quantize.py
@ -0,0 +1,32 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/fake_quantize.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.fake_quantize import (
+    _is_fake_quant_script_module,
+    _is_per_channel,
+    _is_per_tensor,
+    _is_symmetric_quant,
+    default_fake_quant,
+    default_fixed_qparams_range_0to1_fake_quant,
+    default_fixed_qparams_range_neg1to1_fake_quant,
+    default_fused_act_fake_quant,
+    default_fused_per_channel_wt_fake_quant,
+    default_fused_wt_fake_quant,
+    default_histogram_fake_quant,
+    default_per_channel_weight_fake_quant,
+    default_weight_fake_quant,
+    disable_fake_quant,
+    disable_observer,
+    enable_fake_quant,
+    enable_observer,
+    FakeQuantize,
+    FakeQuantizeBase,
+    FixedQParamsFakeQuantize,
+    FusedMovingAvgObsFakeQuantize,
+)
--- a/rl/Lib/site-packages/torch/quantization/fuse_modules.py
+++ b/rl/Lib/site-packages/torch/quantization/fuse_modules.py
@ -0,0 +1,22 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/fuse_modules.py`, while adding an import statement
+here.
+"""
+
+# TODO: These functions are not used outside the `fuse_modules.py`
+#       Keeping here for now, need to remove them later.
+from torch.ao.quantization.fuse_modules import (
+    _fuse_modules,
+    _get_module,
+    _set_module,
+    fuse_known_modules,
+    fuse_modules,
+    get_fuser_method,
+)
+
+# for backward compatiblity
+from torch.ao.quantization.fuser_method_mappings import fuse_conv_bn, fuse_conv_bn_relu
--- a/rl/Lib/site-packages/torch/quantization/fuser_method_mappings.py
+++ b/rl/Lib/site-packages/torch/quantization/fuser_method_mappings.py
@ -0,0 +1,15 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/fuser_method_mappings.py`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fuser_method_mappings import (
+    _DEFAULT_OP_LIST_TO_FUSER_METHOD,
+    fuse_conv_bn,
+    fuse_conv_bn_relu,
+    fuse_linear_bn,
+    get_fuser_method,
+)
--- a/rl/Lib/site-packages/torch/quantization/fx/init.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/init.py
@ -0,0 +1,15 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.fx.convert import convert
+from torch.ao.quantization.fx.fuse import fuse
+
+# omitting files that's unlikely to be used right now, for example
+# the newly added lower_to_fbgemm etc.
+from torch.ao.quantization.fx.prepare import prepare
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/init.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/init.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/_equalize.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/_equalize.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/convert.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/convert.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/fuse.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/fuse.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/fusion_patterns.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/fusion_patterns.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/graph_module.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/graph_module.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/match_utils.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/match_utils.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/pattern_utils.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/pattern_utils.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/prepare.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/prepare.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/quantization_patterns.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/quantization_patterns.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/quantization_types.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/quantization_types.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/pycache/utils.cpython-312.pyc
+++ b/rl/Lib/site-packages/torch/quantization/fx/pycache/utils.cpython-312.pyc
--- a/rl/Lib/site-packages/torch/quantization/fx/_equalize.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/_equalize.py
@ -0,0 +1,38 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx._equalize import (
+    _convert_equalization_ref,
+    _InputEqualizationObserver,
+    _WeightEqualizationObserver,
+    calculate_equalization_scale,
+    clear_weight_quant_obs_node,
+    convert_eq_obs,
+    CUSTOM_MODULE_SUPP_LIST,
+    custom_module_supports_equalization,
+    default_equalization_qconfig,
+    EqualizationQConfig,
+    fused_module_supports_equalization,
+    get_equalization_qconfig_dict,
+    get_layer_sqnr_dict,
+    get_op_node_and_weight_eq_obs,
+    input_equalization_observer,
+    is_equalization_observer,
+    maybe_get_next_equalization_scale,
+    maybe_get_next_input_eq_obs,
+    maybe_get_weight_eq_obs_node,
+    nn_module_supports_equalization,
+    node_supports_equalization,
+    remove_node,
+    reshape_scale,
+    scale_input_observer,
+    scale_weight_functional,
+    scale_weight_node,
+    update_obs_for_equalization,
+    weight_equalization_observer,
+)
--- a/rl/Lib/site-packages/torch/quantization/fx/convert.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/convert.py
@ -0,0 +1,9 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.convert import convert
--- a/rl/Lib/site-packages/torch/quantization/fx/fuse.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/fuse.py
@ -0,0 +1,9 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.fuse import fuse
--- a/rl/Lib/site-packages/torch/quantization/fx/fusion_patterns.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/fusion_patterns.py
@ -0,0 +1,9 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.fuse_handler import DefaultFuseHandler, FuseHandler
--- a/rl/Lib/site-packages/torch/quantization/fx/graph_module.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/graph_module.py
@ -0,0 +1,17 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.graph_module import (
+    _is_observed_module,
+    _is_observed_standalone_module,
+    FusedGraphModule,
+    GraphModule,
+    ObservedGraphModule,
+    ObservedStandaloneGraphModule,
+    QuantizedGraphModule,
+)
--- a/rl/Lib/site-packages/torch/quantization/fx/match_utils.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/match_utils.py
@ -0,0 +1,14 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.match_utils import (
+    _find_matches,
+    _is_match,
+    _MatchResult,
+    MatchAllNode,
+)
--- a/rl/Lib/site-packages/torch/quantization/fx/pattern_utils.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/pattern_utils.py
@ -0,0 +1,35 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.pattern_utils import (
+    _register_fusion_pattern,
+    _register_quant_pattern,
+    get_default_fusion_patterns,
+    get_default_output_activation_post_process_map,
+    get_default_quant_patterns,
+    QuantizeHandler,
+)
+
+
+# QuantizeHandler.__module__ = _NAMESPACE
+_register_fusion_pattern.__module__ = "torch.ao.quantization.fx.pattern_utils"
+get_default_fusion_patterns.__module__ = "torch.ao.quantization.fx.pattern_utils"
+_register_quant_pattern.__module__ = "torch.ao.quantization.fx.pattern_utils"
+get_default_quant_patterns.__module__ = "torch.ao.quantization.fx.pattern_utils"
+get_default_output_activation_post_process_map.__module__ = (
+    "torch.ao.quantization.fx.pattern_utils"
+)
+
+# __all__ = [
+#     "QuantizeHandler",
+#     "_register_fusion_pattern",
+#     "get_default_fusion_patterns",
+#     "_register_quant_pattern",
+#     "get_default_quant_patterns",
+#     "get_default_output_activation_post_process_map",
+# ]
--- a/rl/Lib/site-packages/torch/quantization/fx/prepare.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/prepare.py
@ -0,0 +1,9 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.prepare import prepare
--- a/rl/Lib/site-packages/torch/quantization/fx/quantization_patterns.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/quantization_patterns.py
@ -0,0 +1,48 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.quantize_handler import (
+    BatchNormQuantizeHandler,
+    BinaryOpQuantizeHandler,
+    CatQuantizeHandler,
+    ConvReluQuantizeHandler,
+    CopyNodeQuantizeHandler,
+    CustomModuleQuantizeHandler,
+    DefaultNodeQuantizeHandler,
+    EmbeddingQuantizeHandler,
+    FixedQParamsOpQuantizeHandler,
+    GeneralTensorShapeOpQuantizeHandler,
+    LinearReLUQuantizeHandler,
+    QuantizeHandler,
+    RNNDynamicQuantizeHandler,
+    StandaloneModuleQuantizeHandler,
+)
+
+
+QuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+BinaryOpQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+CatQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+ConvReluQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+LinearReLUQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+BatchNormQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+EmbeddingQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+RNNDynamicQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+DefaultNodeQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+FixedQParamsOpQuantizeHandler.__module__ = (
+    "torch.ao.quantization.fx.quantization_patterns"
+)
+CopyNodeQuantizeHandler.__module__ = "torch.ao.quantization.fx.quantization_patterns"
+CustomModuleQuantizeHandler.__module__ = (
+    "torch.ao.quantization.fx.quantization_patterns"
+)
+GeneralTensorShapeOpQuantizeHandler.__module__ = (
+    "torch.ao.quantization.fx.quantization_patterns"
+)
+StandaloneModuleQuantizeHandler.__module__ = (
+    "torch.ao.quantization.fx.quantization_patterns"
+)
--- a/rl/Lib/site-packages/torch/quantization/fx/quantization_types.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/quantization_types.py
@ -0,0 +1,9 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.utils import Pattern, QuantizerCls
--- a/rl/Lib/site-packages/torch/quantization/fx/utils.py
+++ b/rl/Lib/site-packages/torch/quantization/fx/utils.py
@ -0,0 +1,20 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.utils import (
+    all_node_args_have_no_tensors,
+    assert_and_get_unique_device,
+    create_getattr_from_value,
+    get_custom_module_class_keys,
+    get_linear_prepack_op_for_dtype,
+    get_new_attr_name_with_prefix,
+    get_non_observable_arg_indexes_and_types,
+    get_qconv_prepack_op,
+    graph_module_from_producer_nodes,
+    maybe_get_next_module,
+)
--- a/rl/Lib/site-packages/torch/quantization/observer.py
+++ b/rl/Lib/site-packages/torch/quantization/observer.py
@ -0,0 +1,36 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/observer.py`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.observer import (
+    _is_activation_post_process,
+    _is_per_channel_script_obs_instance,
+    _ObserverBase,
+    _PartialWrapper,
+    _with_args,
+    _with_callable_args,
+    ABC,
+    default_debug_observer,
+    default_dynamic_quant_observer,
+    default_float_qparams_observer,
+    default_histogram_observer,
+    default_observer,
+    default_per_channel_weight_observer,
+    default_placeholder_observer,
+    default_weight_observer,
+    get_observer_state_dict,
+    HistogramObserver,
+    load_observer_state_dict,
+    MinMaxObserver,
+    MovingAverageMinMaxObserver,
+    MovingAveragePerChannelMinMaxObserver,
+    NoopObserver,
+    ObserverBase,
+    PerChannelMinMaxObserver,
+    PlaceholderObserver,
+    RecordingObserver,
+)
--- a/rl/Lib/site-packages/torch/quantization/qconfig.py
+++ b/rl/Lib/site-packages/torch/quantization/qconfig.py
@ -0,0 +1,30 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/qconfig.py`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.qconfig import (
+    _add_module_to_qconfig_obs_ctr,
+    _assert_valid_qconfig,
+    default_activation_only_qconfig,
+    default_debug_qconfig,
+    default_dynamic_qconfig,
+    default_per_channel_qconfig,
+    default_qat_qconfig,
+    default_qat_qconfig_v2,
+    default_qconfig,
+    default_weight_only_qconfig,
+    float16_dynamic_qconfig,
+    float16_static_qconfig,
+    float_qparams_weight_only_qconfig,
+    get_default_qat_qconfig,
+    get_default_qconfig,
+    per_channel_dynamic_qconfig,
+    QConfig,
+    qconfig_equals,
+    QConfigAny,
+    QConfigDynamic,
+)
--- a/rl/Lib/site-packages/torch/quantization/quant_type.py
+++ b/rl/Lib/site-packages/torch/quantization/quant_type.py
@ -0,0 +1,10 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/quant_type.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.quant_type import _get_quant_type_to_str, QuantType
--- a/rl/Lib/site-packages/torch/quantization/quantization_mappings.py
+++ b/rl/Lib/site-packages/torch/quantization/quantization_mappings.py
@ -0,0 +1,29 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/quantization_mappings.py`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.quantization_mappings import (
+    _get_special_act_post_process,
+    _has_special_act_post_process,
+    _INCLUDE_QCONFIG_PROPAGATE_LIST,
+    DEFAULT_DYNAMIC_QUANT_MODULE_MAPPINGS,
+    DEFAULT_FLOAT_TO_QUANTIZED_OPERATOR_MAPPINGS,
+    DEFAULT_MODULE_TO_ACT_POST_PROCESS,
+    DEFAULT_QAT_MODULE_MAPPINGS,
+    DEFAULT_REFERENCE_STATIC_QUANT_MODULE_MAPPINGS,
+    DEFAULT_STATIC_QUANT_MODULE_MAPPINGS,
+    get_default_compare_output_module_list,
+    get_default_dynamic_quant_module_mappings,
+    get_default_float_to_quantized_operator_mappings,
+    get_default_qat_module_mappings,
+    get_default_qconfig_propagation_list,
+    get_default_static_quant_module_mappings,
+    get_dynamic_quant_module_class,
+    get_quantized_operator,
+    get_static_quant_module_class,
+    no_observer_set,
+)
--- a/rl/Lib/site-packages/torch/quantization/quantize.py
+++ b/rl/Lib/site-packages/torch/quantization/quantize.py
@ -0,0 +1,30 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/quantize.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.quantize import (
+    _add_observer_,
+    _convert,
+    _get_observer_dict,
+    _get_unique_devices_,
+    _is_activation_post_process,
+    _observer_forward_hook,
+    _propagate_qconfig_helper,
+    _register_activation_post_process_hook,
+    _remove_activation_post_process,
+    _remove_qconfig,
+    add_quant_dequant,
+    convert,
+    prepare,
+    prepare_qat,
+    propagate_qconfig_,
+    quantize,
+    quantize_dynamic,
+    quantize_qat,
+    swap_module,
+)
--- a/rl/Lib/site-packages/torch/quantization/quantize_fx.py
+++ b/rl/Lib/site-packages/torch/quantization/quantize_fx.py
@ -0,0 +1,26 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/quantize_fx.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.fx.graph_module import ObservedGraphModule
+from torch.ao.quantization.quantize_fx import (
+    _check_is_graph_module,
+    _convert_fx,
+    _convert_standalone_module_fx,
+    _fuse_fx,
+    _prepare_fx,
+    _prepare_standalone_module_fx,
+    _swap_ff_with_fxff,
+    convert_fx,
+    fuse_fx,
+    prepare_fx,
+    prepare_qat_fx,
+    QuantizationTracer,
+    Scope,
+    ScopeContextManager,
+)
--- a/rl/Lib/site-packages/torch/quantization/quantize_jit.py
+++ b/rl/Lib/site-packages/torch/quantization/quantize_jit.py
@ -0,0 +1,26 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/quantize_jit.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.quantize_jit import (
+    _check_forward_method,
+    _check_is_script_module,
+    _convert_jit,
+    _prepare_jit,
+    _prepare_ondevice_dynamic_jit,
+    _quantize_jit,
+    convert_dynamic_jit,
+    convert_jit,
+    fuse_conv_bn_jit,
+    prepare_dynamic_jit,
+    prepare_jit,
+    quantize_dynamic_jit,
+    quantize_jit,
+    script_qconfig,
+    script_qconfig_dict,
+)
--- a/rl/Lib/site-packages/torch/quantization/stubs.py
+++ b/rl/Lib/site-packages/torch/quantization/stubs.py
@ -0,0 +1,10 @@
+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/stubs.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.stubs import DeQuantStub, QuantStub, QuantWrapper
--- a/rl/Lib/site-packages/torch/quantization/utils.py
+++ b/rl/Lib/site-packages/torch/quantization/utils.py
@ -0,0 +1,29 @@
+# flake8: noqa: F401
+r"""
+Utils shared by different modes of quantization (eager/graph)
+
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/utils.py`, while adding an import statement
+here.
+"""
+
+from torch.ao.quantization.utils import (
+    activation_dtype,
+    activation_is_int8_quantized,
+    activation_is_statically_quantized,
+    calculate_qmin_qmax,
+    check_min_max_valid,
+    get_combined_dict,
+    get_qconfig_dtypes,
+    get_qparam_dict,
+    get_quant_type,
+    get_swapped_custom_module_class,
+    getattr_from_fqn,
+    is_per_channel,
+    is_per_tensor,
+    weight_dtype,
+    weight_is_quantized,
+    weight_is_statically_quantized,
+)