Files
Reinforced-Learning-Godot/rl/Lib/site-packages/onnxruntime/quantization/operators/activation.py
2024-10-30 22:14:35 +01:00

120 lines
4.3 KiB
Python

import onnx
from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain
from .base_operator import QuantOperatorBase
from .qdq_base_operator import QDQOperatorBase
class QLinearActivation(QuantOperatorBase):
def __init__(self, onnx_quantizer, onnx_node):
super().__init__(onnx_quantizer, onnx_node)
def QuantizeClipRelu(self): # noqa: N802
node = self.node
assert node.op_type == "Relu" or node.op_type == "Clip"
# When mode is QLinearOps, the output quantization params are calculated based on outputs from
# activation nodes, therefore these nodes can be removed from the graph if they follow a quantized op.
# If input to this node is not quantized then keep this node
# If activation is symmetric, not quantize the op and simply return
if node.input[0] not in self.quantizer.quantized_value_map or self.quantizer.is_activation_symmetric:
return super().quantize()
quantized_value = self.quantizer.quantized_value_map[node.input[0]]
self.quantizer.quantized_value_map[node.output[0]] = quantized_value
def quantize(self):
node = self.node
if node.op_type == "Relu" or node.op_type == "Clip":
self.QuantizeClipRelu()
return
nnapi_sigmoid_option = "extra.Sigmoid.nnapi"
sigmoid_nnapi_mode = (
node.op_type == "Sigmoid"
and nnapi_sigmoid_option in self.quantizer.extra_options
and self.quantizer.extra_options[nnapi_sigmoid_option]
)
use_scale = 1 / 256.0 if sigmoid_nnapi_mode else None
use_zeropoint = 0 if sigmoid_nnapi_mode else None
# No assert on op_type as it is controlled by registry
# only try to quantize when given quantization parameters for it
(
data_found,
output_scale_name,
output_zp_name,
_,
_,
) = self.quantizer._get_quantization_params(node.output[0], use_scale, use_zeropoint)
(
quantized_input_names,
zero_point_names,
scale_names,
nodes,
) = self.quantizer.quantize_activation(node, [0])
if not data_found or quantized_input_names is None:
return super().quantize()
qlinear_activation_output = node.output[0] + TENSOR_NAME_QUANT_SUFFIX
qlinear_activation_name = ""
if node.name:
qlinear_activation_name = node.name + "_quant"
kwargs = {}
for attribute in node.attribute:
kwargs.update(attribute_to_kwarg(attribute))
kwargs["domain"] = ms_domain
qlinear_activation_inputs = [
quantized_input_names[0],
scale_names[0],
zero_point_names[0],
output_scale_name,
output_zp_name,
]
qlinear_activation_node = onnx.helper.make_node(
"QLinear" + node.op_type,
qlinear_activation_inputs,
[qlinear_activation_output],
qlinear_activation_name,
**kwargs,
)
# Create an entry for this quantized value
q_output = QuantizedValue(
node.output[0],
qlinear_activation_output,
output_scale_name,
output_zp_name,
QuantizedValueType.Input,
)
self.quantizer.quantized_value_map[node.output[0]] = q_output
nodes.append(qlinear_activation_node)
self.quantizer.new_nodes += nodes
class QDQRemovableActivation(QDQOperatorBase):
def __init__(self, onnx_quantizer, onnx_node):
super().__init__(onnx_quantizer, onnx_node)
def quantize(self):
node = self.node
# If input to this node is not quantized then keep this node
if not self.quantizer.is_tensor_quantized(node.input[0]):
return
if (
not self.quantizer.is_activation_symmetric
and not self.quantizer.qdq_keep_removable_activations
and self.quantizer.try_replacing_upstream_output(node.input[0], node.output[0])
):
self.quantizer.remove_node(self.node)
else:
self.quantizer.quantize_activation_tensor(node.input[0])
if not self.disable_qdq_for_node_output:
self.quantizer.quantize_activation_tensor(node.output[0])