75 lines
2.6 KiB
Python
75 lines
2.6 KiB
Python
import onnx
|
|
import onnx.helper
|
|
|
|
from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain
|
|
from .base_operator import QuantOperatorBase
|
|
|
|
|
|
class QLinearSoftmax(QuantOperatorBase):
|
|
def quantize(self):
|
|
node = self.node
|
|
# set limitations for softmax output scale and zp, because the output of softmax is always 0-1
|
|
if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8:
|
|
out_scale = 1 / 256.0
|
|
out_zero_point = 0
|
|
else:
|
|
out_scale = 1 / 256.0
|
|
out_zero_point = -128
|
|
# only try to quantize when given quantization parameters for it
|
|
(
|
|
data_found,
|
|
output_scale_name,
|
|
output_zp_name,
|
|
_,
|
|
_,
|
|
) = self.quantizer._get_quantization_params(node.output[0], out_scale, out_zero_point)
|
|
|
|
# get quantized input tensor names, quantize input if needed
|
|
(
|
|
quantized_input_names,
|
|
input_zero_point_names,
|
|
input_scale_names,
|
|
nodes,
|
|
) = self.quantizer.quantize_activation(node, [0])
|
|
|
|
if not data_found or quantized_input_names is None:
|
|
return super().quantize()
|
|
|
|
# Create an entry for output quantized value.
|
|
qlinear_output_name = node.output[0] + TENSOR_NAME_QUANT_SUFFIX
|
|
quantized_output_value = QuantizedValue(
|
|
node.output[0],
|
|
qlinear_output_name,
|
|
output_scale_name,
|
|
output_zp_name,
|
|
QuantizedValueType.Input,
|
|
)
|
|
self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value
|
|
|
|
# Create qlinear softmax node for given type
|
|
kwargs = {}
|
|
for attribute in node.attribute:
|
|
kwargs.update(attribute_to_kwarg(attribute))
|
|
kwargs["domain"] = ms_domain
|
|
# make qlinearsoft has the real opset_version, its default SinceVersion would be 1
|
|
kwargs["opset"] = self.quantizer.opset_version
|
|
qlinear_node_name = node.name + "_quant" if node.name else ""
|
|
qnode = onnx.helper.make_node(
|
|
"QLinear" + node.op_type,
|
|
[
|
|
quantized_input_names[0],
|
|
input_scale_names[0],
|
|
input_zero_point_names[0],
|
|
output_scale_name,
|
|
output_zp_name,
|
|
],
|
|
[qlinear_output_name],
|
|
qlinear_node_name,
|
|
**kwargs,
|
|
)
|
|
|
|
# add all newly created nodes
|
|
nodes.append(qnode)
|
|
self.quantizer.new_nodes += nodes
|
|
return None
|