I am done

2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions
--- a/rl/Lib/site-packages/torch/include/ATen/quantized/QTensorImpl.h
+++ b/rl/Lib/site-packages/torch/include/ATen/quantized/QTensorImpl.h
@ -0,0 +1,125 @@
+#pragma once
+
+#include <ATen/quantized/Quantizer.h>
+#include <c10/core/TensorImpl.h>
+#include <c10/util/Exception.h>
+
+namespace at {
+
+/**
+ * QTensorImpl is a TensorImpl for Quantized Tensors, it stores Quantizer which
+ * specifies the quantization scheme and parameters, for more information please
+ * see ATen/quantized/Quantizer.h
+ *
+ * We'll use QTensor in code or documentation to refer to a Tensor with QTensorImpl.
+ */
+struct TORCH_API QTensorImpl : public c10::TensorImpl {
+ public:
+  QTensorImpl(
+      Storage&& storage,
+      DispatchKeySet key_set,
+      const caffe2::TypeMeta data_type,
+      QuantizerPtr quantizer);
+
+  // See Note [Enum ImplType]
+  QTensorImpl(
+      ImplType type,
+      Storage&& storage,
+      DispatchKeySet key_set,
+      const caffe2::TypeMeta data_type,
+      QuantizerPtr quantizer);
+
+
+  // TODO: Expose in PyTorch Frontend
+  QuantizerPtr quantizer() {
+    return quantizer_;
+  }
+
+  void set_quantizer_(QuantizerPtr quantizer) {
+    quantizer_ = quantizer;
+  }
+
+  /**
+   * Return a TensorImpl that is a shallow-copy of this TensorImpl.
+   *
+   * For usage of `version_counter` and `allow_tensor_metadata_change`,
+   * see NOTE [ TensorImpl Shallow-Copying ].
+   */
+  c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
+      const c10::VariableVersion& version_counter,
+      bool allow_tensor_metadata_change) const override {
+    auto impl = c10::make_intrusive<QTensorImpl>(
+        Storage(storage()), key_set(), data_type_, quantizer_);
+    copy_tensor_metadata(
+      /*src_impl=*/this,
+      /*dest_impl=*/impl.get(),
+      /*version_counter=*/version_counter,
+      /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
+    impl->refresh_numel();
+    impl->refresh_contiguous();
+    return impl;
+  }
+
+  /**
+   * Return a TensorImpl that is a shallow-copy of this TensorImpl.
+   *
+   * For usage of `version_counter` and `allow_tensor_metadata_change`,
+   * see NOTE [ TensorImpl Shallow-Copying ].
+   */
+  c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
+      c10::VariableVersion&& version_counter,
+      bool allow_tensor_metadata_change) const override {
+    auto impl = c10::make_intrusive<QTensorImpl>(
+        Storage(storage()), key_set(), data_type_, quantizer_);
+    copy_tensor_metadata(
+      /*src_impl=*/this,
+      /*dest_impl=*/impl.get(),
+      /*version_counter=*/std::move(version_counter),
+      /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
+    impl->refresh_numel();
+    impl->refresh_contiguous();
+    return impl;
+  }
+
+  /**
+   * Shallow-copies data from another TensorImpl into this TensorImpl.
+   *
+   * For why this function doesn't check this TensorImpl's `allow_tensor_metadata_change_`,
+   * see NOTE [ TensorImpl Shallow-Copying ].
+   */
+  void shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) override {
+    AT_ASSERT(has_compatible_shallow_copy_type(impl->key_set()));
+    auto q_impl = static_cast<const QTensorImpl*>(impl.get());
+    copy_tensor_metadata(
+      /*src_impl=*/q_impl,
+      /*dest_impl=*/this,
+      /*version_counter=*/version_counter(),
+      /*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
+    refresh_numel();
+    refresh_contiguous();
+  }
+
+ private:
+  QuantizerPtr quantizer_;
+
+  const char* tensorimpl_type_name() const override;
+
+  /**
+   * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / storage_offset)
+   * from one TensorImpl to another TensorImpl.
+   *
+   * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE [ TensorImpl Shallow-Copying ].
+   */
+  static void copy_tensor_metadata(
+      const QTensorImpl* src_q_impl,
+      QTensorImpl* dest_q_impl,
+      const c10::VariableVersion& version_counter,
+      bool allow_tensor_metadata_change) {
+    TensorImpl::copy_tensor_metadata(src_q_impl, dest_q_impl, version_counter, allow_tensor_metadata_change);
+
+    // OpaqueTensorImpl-specific fields.
+    dest_q_impl->quantizer_ = src_q_impl->quantizer_;
+  }
+};
+
+} // namespace at
--- a/rl/Lib/site-packages/torch/include/ATen/quantized/Quantizer.h
+++ b/rl/Lib/site-packages/torch/include/ATen/quantized/Quantizer.h
@ -0,0 +1,279 @@
+#pragma once
+
+#include <c10/core/QScheme.h>
+#include <c10/core/MemoryFormat.h>
+#include <c10/macros/Macros.h>
+#include <c10/util/Exception.h>
+#include <c10/util/intrusive_ptr.h>
+#include <c10/core/ScalarType.h>
+#include <c10/core/TensorOptions.h>
+
+#include <ATen/Tensor.h>
+#include <ATen/TensorUtils.h>
+
+#include <ATen/core/QuantizerBase.h>
+
+#include <cmath>
+#include <memory>
+#include <utility>
+
+namespace at {
+
+/**
+ * UnknownQuantizer is a placeholder quantizer for functions that implement
+ * quantization in a two step process.  First a tensor is allocated but with
+ * unknown quantizer, and then the quantization kernel decides what the final
+ * quantizer will be.
+ */
+struct TORCH_API UnknownQuantizer : public Quantizer {
+  explicit UnknownQuantizer(ScalarType scalar_type)
+    : Quantizer(scalar_type) {}
+
+  Tensor quantize(const Tensor& tensor) override;
+  Tensor dequantize(const Tensor& qtensor) override;
+  Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
+  QScheme qscheme() const override;
+  bool equalTo(QuantizerPtr other) const override;
+};
+
+/**
+ * UniformQuantizer is the parent class for all uniform quantizers.
+ * These quantization scheme will map float value uniformly to
+ * the quantized value. For example, affine quantizer is
+ * the most commonly used scheme in this category.
+ */
+struct TORCH_API UniformQuantizer : public Quantizer {
+  explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
+};
+
+/**
+ * NonUniformQuantizer is the parent class for all non-uniform quantizers.
+ * These quantization scheme may map float value non-uniformly to the quantized
+ * value. K-means quantization is a representative example in this category.
+ */
+struct TORCH_API NonUniformQuantizer : public Quantizer {
+  explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
+};
+
+// There is also StochasticQuantizer which is uniform but not affine
+
+/**
+ * AffineQuantizer uses affine transformation to do quantization.
+ *
+ * For quantize:
+ * Y = clamp(round(X / scale + zero_point), min, max)
+ * For dequantize:
+ * X = (Y - zero_point) * scale
+ */
+struct TORCH_API AffineQuantizer : public UniformQuantizer {
+  explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {}
+};
+
+// Note that we will not have Symmetric Quantizer in backend to reduce
+// complications in quantized kernel implementation.
+
+/**
+ * PerTensorAffineQuantizer stores a scale and a zero_point, which is used for
+ * all the values in the Tensor.
+ */
+struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer {
+  explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point)
+    : AffineQuantizer(scalar_type),
+        scale_(scale),
+        zero_point_(zero_point) {}
+
+  Tensor quantize(const Tensor& tensor) override;
+  Tensor dequantize(const Tensor& qtensor) override;
+  Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
+
+  QScheme qscheme() const override {
+    return kPerTensorAffine;
+  }
+
+  double scale() const {
+    return scale_;
+  }
+
+  int64_t zero_point() const {
+    return zero_point_;
+  }
+
+  bool equalTo(QuantizerPtr other) const override {
+    if (!other.get() || other->qscheme() != kPerTensorAffine) {
+      return false;
+    }
+    auto* other_per_tensor_affine =
+        static_cast<PerTensorAffineQuantizer*>(other.get());
+    return scalar_type() == other_per_tensor_affine->scalar_type() &&
+        scale() == other_per_tensor_affine->scale() &&
+        zero_point() == other_per_tensor_affine->zero_point();
+  }
+
+ private:
+  const double scale_;
+  // We use int64_t for consistency with Python
+  const int64_t zero_point_;
+};
+
+/**
+ * PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer
+ * except that we have an independent scale and zero_point parameter
+ * for each channel.
+ *
+ * Also note that per channel quantization is mostly applied to output channels
+ * of weights since per-input channel of weight quantization or per-channel
+ * quantization for activations can't be efficiently supported in most of
+ * processors since it requires each multiplication result within a single
+ * dot-product to have a different scale.
+ */
+struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer {
+  explicit PerChannelAffineQuantizer(
+      ScalarType scalar_type,
+      Tensor scales,
+      Tensor zero_points,
+      int64_t axis)
+      : AffineQuantizer(scalar_type),
+        scales_(std::move(scales)),
+        zero_points_(std::move(zero_points)),
+        axis_(axis) {}
+
+  QScheme qscheme() const override {
+    return kPerChannelAffine;
+  }
+
+  Tensor scales() const {
+    return scales_;
+  }
+
+  Tensor zero_points() const {
+    return zero_points_;
+  }
+
+  int64_t axis() const {
+    return axis_;
+  }
+
+  Tensor quantize(const Tensor& tensor) override;
+  Tensor dequantize(const Tensor& qtensor) override;
+  Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
+
+  bool equalTo(QuantizerPtr other) const override {
+    if (!other.get() || other->qscheme() != kPerChannelAffine) {
+      return false;
+    }
+    auto* other_per_channel_affine =
+        static_cast<PerChannelAffineQuantizer*>(other.get());
+    return scalar_type() == other_per_channel_affine->scalar_type() &&
+        scales().equal(other_per_channel_affine->scales()) &&
+        zero_points().equal(other_per_channel_affine->zero_points()) &&
+        axis() == other_per_channel_affine->axis();
+  }
+
+ protected:
+  Tensor scales_;
+  Tensor zero_points_;
+  const int64_t axis_;
+};
+
+/**
+ * PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer
+ * except that it expects both scale and zero point to be floating point values.
+ *
+ * This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of
+ * kPerChannelAffine.
+ *
+ * The quantize equation in this case looks like -
+ * Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale
+ *
+ * Note: Usage of floating point zero point is useful in cases where 0 doesn't need to
+ * be exactly represented in the quantized space. We can get additional precision by
+ * using floating point values for zero point.
+ */
+struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer {
+  explicit PerChannelAffineFloatQParamsQuantizer(
+      ScalarType scalar_type,
+      Tensor scales,
+      Tensor zero_points,
+      int64_t axis)
+      : PerChannelAffineQuantizer(scalar_type,
+        scales,
+        zero_points,
+        axis) {}
+
+  QScheme qscheme() const override {
+    return kPerChannelAffineFloatQParams;
+  }
+
+  Tensor quantize(const Tensor& tensor) override;
+  Tensor dequantize(const Tensor& qtensor) override;
+  Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
+
+  bool equalTo(QuantizerPtr other) const override {
+    if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) {
+      return false;
+    }
+    auto* other_per_channel_float_qparams =
+        static_cast<PerChannelAffineFloatQParamsQuantizer*>(other.get());
+    return scalar_type() == other_per_channel_float_qparams->scalar_type() &&
+        scales().equal(other_per_channel_float_qparams->scales()) &&
+        zero_points().equal(other_per_channel_float_qparams->zero_points()) &&
+        axis() == other_per_channel_float_qparams->axis();
+  }
+};
+
+// This is an internal utility function for getting at the QTensorImpl,
+// You should only use this for writing low level
+// setters/getters for QTensorImpl fields; otherwise, you should use
+// the low level setters/getters that were implemented using this.
+// This may be called repeatedly, so make sure it's pretty cheap.
+TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self);
+
+// double and int64_t are because of the native function API, we only have these
+// argument types right now in native functions
+TORCH_API QuantizerPtr
+make_per_tensor_affine_quantizer(
+    double scale, int64_t zero_point, ScalarType scalar_type);
+
+TORCH_API QuantizerPtr make_per_channel_affine_quantizer(
+    const Tensor& scales,
+    const Tensor& zero_points,
+    int64_t axis,
+    ScalarType scalar_type);
+
+TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
+
+// Create a Quantized Tensor given arguments for normal Tensor and a quantizer
+TORCH_API Tensor new_qtensor(
+    IntArrayRef sizes,
+    const TensorOptions& options,
+    QuantizerPtr quantizer);
+
+TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer);
+
+TORCH_API Tensor from_blob_quantized_per_tensor_affine(
+    void* data,
+    IntArrayRef sizes,
+    IntArrayRef strides,
+    std::function<void(void*)> deleter,
+    const float scale,
+    const int64_t zeroPoint,
+    const TensorOptions& options);
+
+TORCH_API Tensor from_blob_quantized_per_tensor_affine(
+    void* data,
+    IntArrayRef sizes,
+    std::function<void(void*)> deleter,
+    const float scale,
+    const int64_t zeroPoint,
+    const TensorOptions& options);
+
+TORCH_API Tensor from_blob_quantized_per_channel_affine(
+    void* data,
+    IntArrayRef sizes,
+    std::function<void(void*)> deleter,
+    const Tensor& scales,
+    const Tensor& zero_points,
+    const int64_t axis,
+    const TensorOptions& options);
+
+} // namespace at