I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,125 @@
#pragma once
#include <ATen/quantized/Quantizer.h>
#include <c10/core/TensorImpl.h>
#include <c10/util/Exception.h>
namespace at {
/**
* QTensorImpl is a TensorImpl for Quantized Tensors, it stores Quantizer which
* specifies the quantization scheme and parameters, for more information please
* see ATen/quantized/Quantizer.h
*
* We'll use QTensor in code or documentation to refer to a Tensor with QTensorImpl.
*/
struct TORCH_API QTensorImpl : public c10::TensorImpl {
public:
QTensorImpl(
Storage&& storage,
DispatchKeySet key_set,
const caffe2::TypeMeta data_type,
QuantizerPtr quantizer);
// See Note [Enum ImplType]
QTensorImpl(
ImplType type,
Storage&& storage,
DispatchKeySet key_set,
const caffe2::TypeMeta data_type,
QuantizerPtr quantizer);
// TODO: Expose in PyTorch Frontend
QuantizerPtr quantizer() {
return quantizer_;
}
void set_quantizer_(QuantizerPtr quantizer) {
quantizer_ = quantizer;
}
/**
* Return a TensorImpl that is a shallow-copy of this TensorImpl.
*
* For usage of `version_counter` and `allow_tensor_metadata_change`,
* see NOTE [ TensorImpl Shallow-Copying ].
*/
c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
const c10::VariableVersion& version_counter,
bool allow_tensor_metadata_change) const override {
auto impl = c10::make_intrusive<QTensorImpl>(
Storage(storage()), key_set(), data_type_, quantizer_);
copy_tensor_metadata(
/*src_impl=*/this,
/*dest_impl=*/impl.get(),
/*version_counter=*/version_counter,
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
impl->refresh_numel();
impl->refresh_contiguous();
return impl;
}
/**
* Return a TensorImpl that is a shallow-copy of this TensorImpl.
*
* For usage of `version_counter` and `allow_tensor_metadata_change`,
* see NOTE [ TensorImpl Shallow-Copying ].
*/
c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
c10::VariableVersion&& version_counter,
bool allow_tensor_metadata_change) const override {
auto impl = c10::make_intrusive<QTensorImpl>(
Storage(storage()), key_set(), data_type_, quantizer_);
copy_tensor_metadata(
/*src_impl=*/this,
/*dest_impl=*/impl.get(),
/*version_counter=*/std::move(version_counter),
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
impl->refresh_numel();
impl->refresh_contiguous();
return impl;
}
/**
* Shallow-copies data from another TensorImpl into this TensorImpl.
*
* For why this function doesn't check this TensorImpl's `allow_tensor_metadata_change_`,
* see NOTE [ TensorImpl Shallow-Copying ].
*/
void shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) override {
AT_ASSERT(has_compatible_shallow_copy_type(impl->key_set()));
auto q_impl = static_cast<const QTensorImpl*>(impl.get());
copy_tensor_metadata(
/*src_impl=*/q_impl,
/*dest_impl=*/this,
/*version_counter=*/version_counter(),
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
refresh_numel();
refresh_contiguous();
}
private:
QuantizerPtr quantizer_;
const char* tensorimpl_type_name() const override;
/**
* Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / storage_offset)
* from one TensorImpl to another TensorImpl.
*
* For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE [ TensorImpl Shallow-Copying ].
*/
static void copy_tensor_metadata(
const QTensorImpl* src_q_impl,
QTensorImpl* dest_q_impl,
const c10::VariableVersion& version_counter,
bool allow_tensor_metadata_change) {
TensorImpl::copy_tensor_metadata(src_q_impl, dest_q_impl, version_counter, allow_tensor_metadata_change);
// OpaqueTensorImpl-specific fields.
dest_q_impl->quantizer_ = src_q_impl->quantizer_;
}
};
} // namespace at

View File

@ -0,0 +1,279 @@
#pragma once
#include <c10/core/QScheme.h>
#include <c10/core/MemoryFormat.h>
#include <c10/macros/Macros.h>
#include <c10/util/Exception.h>
#include <c10/util/intrusive_ptr.h>
#include <c10/core/ScalarType.h>
#include <c10/core/TensorOptions.h>
#include <ATen/Tensor.h>
#include <ATen/TensorUtils.h>
#include <ATen/core/QuantizerBase.h>
#include <cmath>
#include <memory>
#include <utility>
namespace at {
/**
* UnknownQuantizer is a placeholder quantizer for functions that implement
* quantization in a two step process. First a tensor is allocated but with
* unknown quantizer, and then the quantization kernel decides what the final
* quantizer will be.
*/
struct TORCH_API UnknownQuantizer : public Quantizer {
explicit UnknownQuantizer(ScalarType scalar_type)
: Quantizer(scalar_type) {}
Tensor quantize(const Tensor& tensor) override;
Tensor dequantize(const Tensor& qtensor) override;
Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
QScheme qscheme() const override;
bool equalTo(QuantizerPtr other) const override;
};
/**
* UniformQuantizer is the parent class for all uniform quantizers.
* These quantization scheme will map float value uniformly to
* the quantized value. For example, affine quantizer is
* the most commonly used scheme in this category.
*/
struct TORCH_API UniformQuantizer : public Quantizer {
explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
};
/**
* NonUniformQuantizer is the parent class for all non-uniform quantizers.
* These quantization scheme may map float value non-uniformly to the quantized
* value. K-means quantization is a representative example in this category.
*/
struct TORCH_API NonUniformQuantizer : public Quantizer {
explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
};
// There is also StochasticQuantizer which is uniform but not affine
/**
* AffineQuantizer uses affine transformation to do quantization.
*
* For quantize:
* Y = clamp(round(X / scale + zero_point), min, max)
* For dequantize:
* X = (Y - zero_point) * scale
*/
struct TORCH_API AffineQuantizer : public UniformQuantizer {
explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {}
};
// Note that we will not have Symmetric Quantizer in backend to reduce
// complications in quantized kernel implementation.
/**
* PerTensorAffineQuantizer stores a scale and a zero_point, which is used for
* all the values in the Tensor.
*/
struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer {
explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point)
: AffineQuantizer(scalar_type),
scale_(scale),
zero_point_(zero_point) {}
Tensor quantize(const Tensor& tensor) override;
Tensor dequantize(const Tensor& qtensor) override;
Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
QScheme qscheme() const override {
return kPerTensorAffine;
}
double scale() const {
return scale_;
}
int64_t zero_point() const {
return zero_point_;
}
bool equalTo(QuantizerPtr other) const override {
if (!other.get() || other->qscheme() != kPerTensorAffine) {
return false;
}
auto* other_per_tensor_affine =
static_cast<PerTensorAffineQuantizer*>(other.get());
return scalar_type() == other_per_tensor_affine->scalar_type() &&
scale() == other_per_tensor_affine->scale() &&
zero_point() == other_per_tensor_affine->zero_point();
}
private:
const double scale_;
// We use int64_t for consistency with Python
const int64_t zero_point_;
};
/**
* PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer
* except that we have an independent scale and zero_point parameter
* for each channel.
*
* Also note that per channel quantization is mostly applied to output channels
* of weights since per-input channel of weight quantization or per-channel
* quantization for activations can't be efficiently supported in most of
* processors since it requires each multiplication result within a single
* dot-product to have a different scale.
*/
struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer {
explicit PerChannelAffineQuantizer(
ScalarType scalar_type,
Tensor scales,
Tensor zero_points,
int64_t axis)
: AffineQuantizer(scalar_type),
scales_(std::move(scales)),
zero_points_(std::move(zero_points)),
axis_(axis) {}
QScheme qscheme() const override {
return kPerChannelAffine;
}
Tensor scales() const {
return scales_;
}
Tensor zero_points() const {
return zero_points_;
}
int64_t axis() const {
return axis_;
}
Tensor quantize(const Tensor& tensor) override;
Tensor dequantize(const Tensor& qtensor) override;
Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
bool equalTo(QuantizerPtr other) const override {
if (!other.get() || other->qscheme() != kPerChannelAffine) {
return false;
}
auto* other_per_channel_affine =
static_cast<PerChannelAffineQuantizer*>(other.get());
return scalar_type() == other_per_channel_affine->scalar_type() &&
scales().equal(other_per_channel_affine->scales()) &&
zero_points().equal(other_per_channel_affine->zero_points()) &&
axis() == other_per_channel_affine->axis();
}
protected:
Tensor scales_;
Tensor zero_points_;
const int64_t axis_;
};
/**
* PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer
* except that it expects both scale and zero point to be floating point values.
*
* This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of
* kPerChannelAffine.
*
* The quantize equation in this case looks like -
* Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale
*
* Note: Usage of floating point zero point is useful in cases where 0 doesn't need to
* be exactly represented in the quantized space. We can get additional precision by
* using floating point values for zero point.
*/
struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer {
explicit PerChannelAffineFloatQParamsQuantizer(
ScalarType scalar_type,
Tensor scales,
Tensor zero_points,
int64_t axis)
: PerChannelAffineQuantizer(scalar_type,
scales,
zero_points,
axis) {}
QScheme qscheme() const override {
return kPerChannelAffineFloatQParams;
}
Tensor quantize(const Tensor& tensor) override;
Tensor dequantize(const Tensor& qtensor) override;
Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
bool equalTo(QuantizerPtr other) const override {
if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) {
return false;
}
auto* other_per_channel_float_qparams =
static_cast<PerChannelAffineFloatQParamsQuantizer*>(other.get());
return scalar_type() == other_per_channel_float_qparams->scalar_type() &&
scales().equal(other_per_channel_float_qparams->scales()) &&
zero_points().equal(other_per_channel_float_qparams->zero_points()) &&
axis() == other_per_channel_float_qparams->axis();
}
};
// This is an internal utility function for getting at the QTensorImpl,
// You should only use this for writing low level
// setters/getters for QTensorImpl fields; otherwise, you should use
// the low level setters/getters that were implemented using this.
// This may be called repeatedly, so make sure it's pretty cheap.
TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self);
// double and int64_t are because of the native function API, we only have these
// argument types right now in native functions
TORCH_API QuantizerPtr
make_per_tensor_affine_quantizer(
double scale, int64_t zero_point, ScalarType scalar_type);
TORCH_API QuantizerPtr make_per_channel_affine_quantizer(
const Tensor& scales,
const Tensor& zero_points,
int64_t axis,
ScalarType scalar_type);
TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
// Create a Quantized Tensor given arguments for normal Tensor and a quantizer
TORCH_API Tensor new_qtensor(
IntArrayRef sizes,
const TensorOptions& options,
QuantizerPtr quantizer);
TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer);
TORCH_API Tensor from_blob_quantized_per_tensor_affine(
void* data,
IntArrayRef sizes,
IntArrayRef strides,
std::function<void(void*)> deleter,
const float scale,
const int64_t zeroPoint,
const TensorOptions& options);
TORCH_API Tensor from_blob_quantized_per_tensor_affine(
void* data,
IntArrayRef sizes,
std::function<void(void*)> deleter,
const float scale,
const int64_t zeroPoint,
const TensorOptions& options);
TORCH_API Tensor from_blob_quantized_per_channel_affine(
void* data,
IntArrayRef sizes,
std::function<void(void*)> deleter,
const Tensor& scales,
const Tensor& zero_points,
const int64_t axis,
const TensorOptions& options);
} // namespace at