2676 lines
112 KiB
C++
2676 lines
112 KiB
C++
// Copyright (c) ONNX Project Contributors
|
|
|
|
/*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
#include "onnx/defs/controlflow/utils.h"
|
|
#include "onnx/defs/schema.h"
|
|
|
|
namespace ONNX_NAMESPACE {
|
|
|
|
using SupportType = OpSchema::SupportType;
|
|
|
|
static std::vector<std::string> control_flow_types_ir9() {
|
|
auto t = OpSchema::all_tensor_types_ir9();
|
|
auto s = OpSchema::all_tensor_sequence_types_ir9();
|
|
auto o = OpSchema::all_optional_types_ir9();
|
|
t.insert(t.end(), s.begin(), s.end());
|
|
t.insert(t.end(), o.begin(), o.end());
|
|
return t;
|
|
}
|
|
|
|
static std::vector<std::string> control_flow_types_ir4() {
|
|
auto t = OpSchema::all_tensor_types_ir4();
|
|
auto s = OpSchema::all_tensor_sequence_types_ir4();
|
|
auto o = OpSchema::all_optional_types_ir4();
|
|
t.insert(t.end(), s.begin(), s.end());
|
|
t.insert(t.end(), o.begin(), o.end());
|
|
return t;
|
|
}
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
If,
|
|
19,
|
|
OpSchema()
|
|
.SetDoc("If conditional")
|
|
.Input(0, "cond", "Condition for the if. The tensor must contain a single element.", "B")
|
|
.Output(
|
|
0,
|
|
"outputs",
|
|
"Values that are live-out to the enclosing scope. The return values in "
|
|
"the `then_branch` and `else_branch` must be of the same data type. "
|
|
"The `then_branch` and `else_branch` may produce tensors with the same "
|
|
"element type and different shapes. "
|
|
"If corresponding outputs from the then-branch and the else-branch have "
|
|
"static shapes S1 and S2, then the shape of the corresponding output "
|
|
"variable of the if-node (if present) must be compatible with both S1 "
|
|
"and S2 as it represents the union of both possible shapes."
|
|
"For example, if in a model file, the first "
|
|
"output of `then_branch` is typed float tensor with shape [2] and the "
|
|
"first output of `else_branch` is another float tensor with shape [3], "
|
|
"If's first output should have (a) no shape set, or (b) "
|
|
"a shape of rank 1 with neither `dim_value` nor `dim_param` set, or (c) "
|
|
"a shape of rank 1 with a unique `dim_param`. "
|
|
"In contrast, the first output cannot have the shape [2] since [2] and "
|
|
"[3] are not compatible.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"then_branch",
|
|
"Graph to run if condition is true. Has N outputs: values you wish to "
|
|
"be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the else_branch.",
|
|
AttributeProto::GRAPH)
|
|
.Attr(
|
|
"else_branch",
|
|
"Graph to run if condition is false. Has N outputs: values you wish to"
|
|
" be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the then_branch.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint(
|
|
"V",
|
|
control_flow_types_ir9(),
|
|
"All Tensor, Sequence(Tensor), Optional(Tensor), and Optional(Sequence(Tensor)) types up to IRv9.")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "Only bool")
|
|
.TypeAndShapeInferenceFunction(IfInferenceFunction));
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
If,
|
|
16,
|
|
OpSchema()
|
|
.SetDoc("If conditional")
|
|
.Input(0, "cond", "Condition for the if. The tensor must contain a single element.", "B")
|
|
.Output(
|
|
0,
|
|
"outputs",
|
|
"Values that are live-out to the enclosing scope. The return values in "
|
|
"the `then_branch` and `else_branch` must be of the same data type. "
|
|
"The `then_branch` and `else_branch` may produce tensors with the same "
|
|
"element type and different shapes. "
|
|
"If corresponding outputs from the then-branch and the else-branch have "
|
|
"static shapes S1 and S2, then the shape of the corresponding output "
|
|
"variable of the if-node (if present) must be compatible with both S1 "
|
|
"and S2 as it represents the union of both possible shapes."
|
|
"For example, if in a model file, the first "
|
|
"output of `then_branch` is typed float tensor with shape [2] and the "
|
|
"first output of `else_branch` is another float tensor with shape [3], "
|
|
"If's first output should have (a) no shape set, or (b) "
|
|
"a shape of rank 1 with neither `dim_value` nor `dim_param` set, or (c) "
|
|
"a shape of rank 1 with a unique `dim_param`. "
|
|
"In contrast, the first output cannot have the shape [2] since [2] and "
|
|
"[3] are not compatible.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"then_branch",
|
|
"Graph to run if condition is true. Has N outputs: values you wish to "
|
|
"be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the else_branch.",
|
|
AttributeProto::GRAPH)
|
|
.Attr(
|
|
"else_branch",
|
|
"Graph to run if condition is false. Has N outputs: values you wish to"
|
|
" be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the then_branch.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint(
|
|
"V",
|
|
control_flow_types_ir4(),
|
|
"All Tensor, Sequence(Tensor), Optional(Tensor), and Optional(Sequence(Tensor)) types up to IRv4.")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "Only bool")
|
|
.TypeAndShapeInferenceFunction(IfInferenceFunction));
|
|
|
|
static const char* Loop_ver16_doc = R"DOC(
|
|
Generic Looping construct. This loop has multiple termination conditions:
|
|
|
|
1) Trip count. Iteration count specified at runtime. Set by
|
|
specifying the input M. Optional. Set to empty string to omit.
|
|
Note that a static trip count (specified at graph construction time) can be
|
|
specified by passing in a constant node for input M.
|
|
2) Loop termination condition. This is an input to the op that determines
|
|
whether to run the first iteration and also a loop-carried dependency for
|
|
the body graph. The body graph must yield a value for the condition variable,
|
|
whether this input is provided or not.
|
|
|
|
This table summarizes the operating modes of this operator with equivalent
|
|
C-style code:
|
|
|
|
Operator inputs defined as (max_trip_count, condition_var).
|
|
|
|
* input ("", ""):
|
|
for (int i=0; ; ++i) {
|
|
cond = ... // Note this value is ignored, but is required in the body
|
|
}
|
|
|
|
* input ("", cond) // Note this is analogous to a while loop
|
|
bool cond = ...;
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
* input ("", 1) // Note this is analogous to a do-while loop
|
|
bool cond = true
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
* input (trip_count, "") // Note this is analogous to a for loop
|
|
int trip_count = ...
|
|
for (int i=0; i < trip_count; ++i) {
|
|
cond = ...; // ignored
|
|
}
|
|
|
|
* input (trip_count, cond)
|
|
int trip_count = ...;
|
|
bool cond = ...;
|
|
for (int i=0; i < trip_count && cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
|
|
*Sample usage - cond as well as trip count*
|
|
|
|
graph predict-net {
|
|
%a = Constant[value = <Scalar Tensor [3]>]()
|
|
%b = Constant[value = <Scalar Tensor [6]>]()
|
|
%keepgoing = Constant[value = <Scalar Tensor [1]>]()
|
|
%max_trip_count = Constant[value = <Scalar Tensor [10]>]()
|
|
%keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
|
|
return
|
|
}
|
|
|
|
graph body-net (
|
|
%i[INT32, scalar] // iteration number
|
|
%keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used
|
|
%b_in[INT32, scalar] // incoming value of loop-carried-dependency b
|
|
) {
|
|
%my_local = Add(%a, %b_in)
|
|
%b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b
|
|
%keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition
|
|
%user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated
|
|
return %keepgoing_out, %b_out, %user_defined_val
|
|
}
|
|
|
|
*Sample equivalent C code*
|
|
|
|
{
|
|
/* User-defined code (enclosing scope) */
|
|
int a = 3, b = 6;
|
|
bool keepgoing = true; // Analogous to input cond
|
|
/* End user-defined code */
|
|
|
|
/* Implicitly-defined code */
|
|
const int max_trip_count = 10; // Analogous to input M
|
|
int user_defined_vals[]; // Imagine this is resizable
|
|
/* End implicitly-defined code */
|
|
/* initialize loop-carried variables and scan-output variables */
|
|
bool keepgoing_out = keepgoing
|
|
int b_out = b
|
|
|
|
for (int i=0; i < max_trip_count && keepgoing_out; ++i) {
|
|
/* Implicitly-defined code: bind actual parameter values
|
|
to formal parameter variables of loop-body */
|
|
bool keepgoing_in = keepgoing_out;
|
|
bool b_in = b_out;
|
|
|
|
/* User-defined code (loop body) */
|
|
int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
|
|
b_out = a - b_in;
|
|
keepgoing_out = my_local > b_out;
|
|
user_defined_val = b_in + b_in; // b_in and b_out are different variables
|
|
/* End user-defined code */
|
|
|
|
/* Implicitly defined-code */
|
|
user_defined_vals[i] = user_defined_val // accumulate scan-output values
|
|
}
|
|
// int t = my_local; // Can't do this. my_local is not accessible here.
|
|
|
|
// The values below are bound to the output variables of the loop and therefore accessible
|
|
// b_out; user_defined_vals; keepgoing_out;
|
|
}
|
|
|
|
There are several things of note in this code snippet:
|
|
|
|
1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can
|
|
be referenced in the inputs of the loop.
|
|
2) Any values computed in the loop body that needs to be used in a subsequent
|
|
iteration or after the loop are modelled using a pair of variables in the loop-body,
|
|
consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
|
|
These are referred to as loop-carried dependences. The loop operation node
|
|
supplies the input value of the input variable for the first iteration, and
|
|
returns the output value of the output variable produced by the final
|
|
iteration.
|
|
3) Scan_output variables are used to implicitly concatenate values computed across
|
|
all the iterations. In the above example, the value of user_defined_val computed
|
|
over all iterations are concatenated and returned as the value of user_defined_vals
|
|
after the loop.
|
|
4) Values created in the body cannot be accessed in the enclosing scope,
|
|
except using the mechanism described above.
|
|
|
|
Note that the semantics of this op support "diagonal" or "wavefront" execution.
|
|
(See Step 3 here for an example:
|
|
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
|
|
Frontends should emit multi-layer RNNs as a series of While operators (with
|
|
time being the inner looping dimension), with each successive layer consuming
|
|
the scan_outputs from the previous layer, possibly going through several
|
|
point-wise operators (e.g. dropout, residual connections, linear layer).
|
|
|
|
The input/output of subgraph (produced by loop node) matching is based on order instead of name. The implementation will figure out the names based on this order.
|
|
)DOC";
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Loop,
|
|
19,
|
|
OpSchema()
|
|
.SetDoc(Loop_ver16_doc)
|
|
.Input(
|
|
0,
|
|
"M",
|
|
"A maximum trip-count for the loop specified at runtime. Optional."
|
|
" Pass empty string to skip.",
|
|
"I",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
1,
|
|
"cond",
|
|
"A boolean termination condition. Optional. Pass empty string to skip.",
|
|
"B",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
2,
|
|
"v_initial",
|
|
"The initial values of any loop-carried dependencies (values that "
|
|
"change across loop iterations)",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false,
|
|
0)
|
|
.Output(
|
|
0,
|
|
"v_final_and_scan_outputs",
|
|
"Final N loop carried dependency values then K scan_outputs. "
|
|
"Scan outputs must be Tensors.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has 2+N inputs: (iteration_num, "
|
|
"condition, loop carried dependencies...). It has 1+N+K outputs: "
|
|
"(condition, loop carried dependencies..., scan_outputs...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"output value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions or data type of these scan_outputs change across loop"
|
|
" iterations.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint(
|
|
"V",
|
|
control_flow_types_ir9(),
|
|
"All Tensor, Sequence(Tensor), Optional(Tensor), and Optional(Sequence(Tensor)) types up to IRv9.")
|
|
.TypeConstraint("I", {"tensor(int64)"}, "tensor of int64, which should be a scalar.")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "tensor of bool, which should be a scalar.")
|
|
.TypeAndShapeInferenceFunction(LoopInferenceFunction));
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Loop,
|
|
16,
|
|
OpSchema()
|
|
.SetDoc(Loop_ver16_doc)
|
|
.Input(
|
|
0,
|
|
"M",
|
|
"A maximum trip-count for the loop specified at runtime. Optional."
|
|
" Pass empty string to skip.",
|
|
"I",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
1,
|
|
"cond",
|
|
"A boolean termination condition. Optional. Pass empty string to skip.",
|
|
"B",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
2,
|
|
"v_initial",
|
|
"The initial values of any loop-carried dependencies (values that "
|
|
"change across loop iterations)",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false,
|
|
0)
|
|
.Output(
|
|
0,
|
|
"v_final_and_scan_outputs",
|
|
"Final N loop carried dependency values then K scan_outputs. "
|
|
"Scan outputs must be Tensors.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has 2+N inputs: (iteration_num, "
|
|
"condition, loop carried dependencies...). It has 1+N+K outputs: "
|
|
"(condition, loop carried dependencies..., scan_outputs...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"output value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions or data type of these scan_outputs change across loop"
|
|
" iterations.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint(
|
|
"V",
|
|
control_flow_types_ir4(),
|
|
"All Tensor, Sequence(Tensor), Optional(Tensor), and Optional(Sequence(Tensor)) types up to IRv4.")
|
|
.TypeConstraint("I", {"tensor(int64)"}, "tensor of int64, which should be a scalar.")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "tensor of bool, which should be a scalar.")
|
|
.TypeAndShapeInferenceFunction(LoopInferenceFunction));
|
|
|
|
static const char* scan_16_doc = R"DOC(
|
|
Scan can be used to iterate over one or more scan_input tensors,
|
|
constructing zero or more scan_output tensors. It combines ideas from general recurrences,
|
|
functional programming constructs such as scan, fold, map, and zip, and is intended to enable
|
|
generalizations of RNN-like constructs for sequence-to-sequence processing.
|
|
Other tensors (referred to as state_variables here) can be used to carry a state
|
|
when iterating from one element to another (similar to hidden-state in RNNs, also referred
|
|
to as loop-carried dependences in the context of loops).
|
|
Many common usages involve a single scan_input tensor (where functionality
|
|
similar to scan, fold and map can be obtained). When more than one scan_input is used,
|
|
a behavior similar to zip is obtained.
|
|
|
|
The attribute body must be a graph, specifying the computation to be performed in
|
|
every iteration. It takes as input the current values of the state_variables and
|
|
the current iterated element of the scan_inputs. It must return the (updated) values
|
|
of the state_variables and zero or more scan_output_element tensors. The values of the
|
|
scan_output_element tensors are concatenated over all the iterations to produce the
|
|
scan_output values of the scan construct (similar to the concatenated intermediate
|
|
hidden-state values of RNN-like constructs). All the output tensors (state_variables as
|
|
well as scan_output_element tensors) are required to have the same shape in each iteration
|
|
of the loop (a restriction imposed to enable efficient memory allocation).
|
|
|
|
Note that the iterated element passed to the body subgraph does not have a sequence
|
|
axis. It will have a rank one less than the rank of the corresponding scan_input.
|
|
|
|
The scan operation returns the final values of the state_variables as well as the
|
|
scan_outputs.
|
|
|
|
The optional attribute scan_input_directions specifies the direction (forward or backward)
|
|
for each scan input. If this attribute is omitted, all sequences are scanned in the forward
|
|
direction. A bidirectional scan may be performed by specifying the same tensor input twice
|
|
in the scan_inputs, once with a forward direction, and once with a backward direction.
|
|
|
|
The scan_output of the operation is produced by concatenating the scan_output_element
|
|
values produced by the body in each iteration. The optional attribute scan_output_directions
|
|
specifies the direction in which scan_output is constructed (by appending or prepending the
|
|
scan_output_element to scan_output in each iteration) for each scan_output. If this attribute
|
|
is omitted, the scan_output_element is appended to the scan_output in each iteration.
|
|
|
|
The optional attribute scan_input_axes specifies the axis to be scanned for each scan_input.
|
|
If omitted, every scan_input will be scanned in axis 0. For example, if axis 0 is the
|
|
batch axis and axis 1 is the time axis (to be scanned), specify an axis value of 1.
|
|
Note that scanning a non-zero axis may be less efficient than scanning axis zero.
|
|
|
|
The optional attribute scan_output_axes specifies the axis along which the scan_outputs
|
|
are accumulated for each scan_output. For example, if axis 1 is the time axis (to be
|
|
scanned) for both inputs and outputs, specify a scan_input axis and scan_output axis
|
|
value of 1.
|
|
|
|
Note that because of the ONNX restriction that only the last parameter of an operator can
|
|
be variadic, the initial-states and scan-inputs are listed together as one input parameter.
|
|
Similarly, the final-states and scan-outputs are listed together as one output parameter.
|
|
The attribute num_scan_inputs indicates the number M of scan-inputs.
|
|
|
|
The behavior of
|
|
|
|
Scan <
|
|
num_scan_inputs = m,
|
|
body = loop-body,
|
|
scan_input_axes = [axis_1, ..., axis_m]
|
|
> (init_1, ..., init_n, scan_1, ..., scan_m)
|
|
|
|
is equivalent to the following pseudo-code:
|
|
|
|
// scan_i.shape[axis_i] denotes the (max) sequence-length of scan_i
|
|
// scan_i.shape[axis_i] is required to be equal to scan_j.shape[axis_j] for all i,j.
|
|
sequence_length = scan_1.shape[axis_1];
|
|
|
|
// initialize state-variables
|
|
st_1 = init_1; ... st_n = init_n;
|
|
// initialize scan-output variables: [] denotes an empty tensor
|
|
scan_out_1 = []; ...; scan_out_k = [];
|
|
// identify number of iterations:
|
|
|
|
// execute loop
|
|
for (int t = 0; t < sequence_length; ++t) {
|
|
// generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor
|
|
// of rank one less than T obtained by indexing T at position t along axis k.
|
|
si_1 = scan_1<axis=axis_1>[t];
|
|
... ;
|
|
si_m = scan_m<axis=axis_m>[t];
|
|
// execute loop-body
|
|
st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)
|
|
// accumulate the scan-output elements
|
|
scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);
|
|
}
|
|
|
|
return st_1, ..., st_n, scan_out_1, ..., scan_out_k;
|
|
|
|
*Sample usage: Encoding RNN using a Scan*
|
|
|
|
The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,
|
|
recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can
|
|
be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes
|
|
%Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these
|
|
values are computed in the outer graph, they need to be passed in as extra state_variables.
|
|
|
|
graph rnn-encoding {
|
|
%H_0 = ...
|
|
%X = ...
|
|
%Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1](%H_0, %X)
|
|
return %Y, %Y_h
|
|
}
|
|
|
|
graph rnn-cell-1 (
|
|
%H_tminus1[FLOAT, tensor]
|
|
%X_t[FLOAT, tensor]
|
|
) {
|
|
%Wi = ...
|
|
%Ri = ...
|
|
%Wbi = ...
|
|
%Rbi = ...
|
|
%t1 = X_t * (Wi^T)
|
|
%t2 = H_tminus1*(Ri^T)
|
|
%t3 = Add(%t1, %t2)
|
|
%t4 = Add(%t3, %Wbi)
|
|
%t5 = Add(%t4, %Rbi)
|
|
%Ht = Tanh(%t5)
|
|
%Accumulate = Identity(%Ht)
|
|
return %Ht, %Accumulate
|
|
}
|
|
|
|
)DOC";
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Scan,
|
|
19,
|
|
OpSchema()
|
|
.SetDoc(scan_16_doc)
|
|
.Input(
|
|
0,
|
|
"initial_state_and_scan_inputs",
|
|
"Initial values of the loop's N state variables followed by M scan_inputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Output(
|
|
0,
|
|
"final_state_and_scan_outputs",
|
|
"Final values of the loop's N state variables followed by K scan_outputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has N+M inputs: "
|
|
"(loop state variables..., scan_input_elts...). It has N+K outputs: "
|
|
"(loop state variables..., scan_output_elts...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"scan_output_elt value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions of these values change across loop iterations.",
|
|
AttributeProto::GRAPH,
|
|
true)
|
|
.Attr("num_scan_inputs", "An attribute specifying the number of scan_inputs M. ", AttributeProto::INT, true)
|
|
.Attr(
|
|
"scan_input_directions",
|
|
"An optional list of M flags. The i-th element of the list specifies the direction "
|
|
"to be scanned for the i-th scan_input tensor: 0 indicates forward direction and 1 "
|
|
"indicates reverse direction. "
|
|
"If omitted, all scan_input tensors will be scanned in the forward direction.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_directions",
|
|
"An optional list of K flags, one for each scan_output. The i-th element of the list "
|
|
"specifies whether the i-th scan_output should be constructed by appending or "
|
|
"prepending a new value in each iteration: 0 indicates appending and 1 "
|
|
"indicates prepending. "
|
|
"If omitted, all scan_output tensors will be produced by appending a value "
|
|
"in each iteration.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_input_axes",
|
|
"An optional list of M flags. The i-th element of the list specifies the axis "
|
|
"to be scanned (the sequence axis) for the i-th scan_input. If omitted, 0 will "
|
|
"be used as the scan axis for every scan_input. Negative value for an axis means "
|
|
"counting dimensions from the back. Accepted range is [-r, r-1] where r = rank(input).",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_axes",
|
|
"An optional list of K flags. The i-th element of the list specifies the axis "
|
|
"for the i-th scan_output. The scan outputs are accumulated along the specified "
|
|
"axis. If omitted, 0 will be used as the scan axis for every scan_output. "
|
|
"Negative value for an axis means counting dimensions from the back. Accepted "
|
|
"range is [-r, r-1].",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types_ir9(), "All Tensor types up to IRv9.")
|
|
.TypeAndShapeInferenceFunction(ScanInferenceFunction)); // Shares same shape inference as opset 11
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Scan,
|
|
16,
|
|
OpSchema()
|
|
.SetDoc(scan_16_doc)
|
|
.Input(
|
|
0,
|
|
"initial_state_and_scan_inputs",
|
|
"Initial values of the loop's N state variables followed by M scan_inputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Output(
|
|
0,
|
|
"final_state_and_scan_outputs",
|
|
"Final values of the loop's N state variables followed by K scan_outputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has N+M inputs: "
|
|
"(loop state variables..., scan_input_elts...). It has N+K outputs: "
|
|
"(loop state variables..., scan_output_elts...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"scan_output_elt value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions of these values change across loop iterations.",
|
|
AttributeProto::GRAPH,
|
|
true)
|
|
.Attr("num_scan_inputs", "An attribute specifying the number of scan_inputs M. ", AttributeProto::INT, true)
|
|
.Attr(
|
|
"scan_input_directions",
|
|
"An optional list of M flags. The i-th element of the list specifies the direction "
|
|
"to be scanned for the i-th scan_input tensor: 0 indicates forward direction and 1 "
|
|
"indicates reverse direction. "
|
|
"If omitted, all scan_input tensors will be scanned in the forward direction.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_directions",
|
|
"An optional list of K flags, one for each scan_output. The i-th element of the list "
|
|
"specifies whether the i-th scan_output should be constructed by appending or "
|
|
"prepending a new value in each iteration: 0 indicates appending and 1 "
|
|
"indicates prepending. "
|
|
"If omitted, all scan_output tensors will be produced by appending a value "
|
|
"in each iteration.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_input_axes",
|
|
"An optional list of M flags. The i-th element of the list specifies the axis "
|
|
"to be scanned (the sequence axis) for the i-th scan_input. If omitted, 0 will "
|
|
"be used as the scan axis for every scan_input. Negative value for an axis means "
|
|
"counting dimensions from the back. Accepted range is [-r, r-1] where r = rank(input).",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_axes",
|
|
"An optional list of K flags. The i-th element of the list specifies the axis "
|
|
"for the i-th scan_output. The scan outputs are accumulated along the specified "
|
|
"axis. If omitted, 0 will be used as the scan axis for every scan_output. "
|
|
"Negative value for an axis means counting dimensions from the back. Accepted "
|
|
"range is [-r, r-1].",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types_ir4(), "All Tensor types up to IRv4.")
|
|
.TypeAndShapeInferenceFunction(ScanInferenceFunction)); // Shares same shape inference as opset 11
|
|
|
|
void ScanInferenceFunctionOpset8(InferenceContext& ctx) {
|
|
// NOTE:
|
|
// The first input to Scan is sequence_lens. We skip that when processing
|
|
// inputs in many places below, so the - 1 in multiple places is due to that.
|
|
auto num_inputs = ctx.getNumInputs();
|
|
auto num_scan_inputs = narrow_cast<size_t>(ctx.getAttribute("num_scan_inputs")->i());
|
|
auto num_loop_state_vars = num_inputs - 1 - num_scan_inputs;
|
|
|
|
std::vector<TypeProto> temporary_type_protos;
|
|
temporary_type_protos.reserve(num_inputs);
|
|
|
|
std::vector<const TypeProto*> subgraph_input_types;
|
|
|
|
TensorShapeProto_Dimension batch_size_dim;
|
|
TensorShapeProto_Dimension sequence_len_dim;
|
|
|
|
for (size_t i = 1; i < num_inputs; ++i) {
|
|
bool is_loop_state_var = (i - 1) < num_loop_state_vars;
|
|
bool has_shape = hasInputShape(ctx, i);
|
|
const auto* input_type = ctx.getInputType(i);
|
|
|
|
// Enforce type constraint for inputs
|
|
if (!input_type || !input_type->has_tensor_type()) {
|
|
fail_type_inference("Scan input ", i, " was not a tensor.");
|
|
}
|
|
|
|
if (is_loop_state_var) {
|
|
// If it's a loop state variable we can propagate type and shape 1:1 to
|
|
// the matching Scan output.
|
|
// We can also pass through the type and shape to the subgraph but need to
|
|
// remove the batch size dimension from the shape.
|
|
propagateElemTypeFromInputToOutput(ctx, i, i - 1);
|
|
|
|
if (has_shape) {
|
|
propagateShapeFromInputToOutput(ctx, i, i - 1);
|
|
|
|
// remove batch size dimension and add to subgraph_input_types
|
|
temporary_type_protos.push_back(RemoveDimensionsFromShape(*input_type, 1));
|
|
subgraph_input_types.push_back(&temporary_type_protos.back());
|
|
} else {
|
|
subgraph_input_types.push_back(input_type);
|
|
}
|
|
} else {
|
|
// For other inputs there is no fixed relationships to the Scan outputs,
|
|
// so we don't propagate type/shape information.
|
|
// We can pass through the type and shape to the subgraph inputs but need
|
|
// to remove the batch size and sequence length dimensions from the shape.
|
|
if (has_shape) {
|
|
// remove batch size and sequence length dimensions and add to
|
|
// subgraph_input_types
|
|
temporary_type_protos.push_back(RemoveDimensionsFromShape(*input_type, 2));
|
|
subgraph_input_types.push_back(&temporary_type_protos.back());
|
|
|
|
// update batch_size and sequence_len if a value is available
|
|
const auto& shape = input_type->tensor_type().shape();
|
|
if (shape.dim_size() > 2) {
|
|
const auto& dims = shape.dim();
|
|
mergeInDimensionInfo(dims.Get(0), batch_size_dim, 0);
|
|
mergeInDimensionInfo(dims.Get(1), sequence_len_dim, 1);
|
|
}
|
|
} else {
|
|
subgraph_input_types.push_back(input_type);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Run inferencing on the subgraph
|
|
std::vector<const TypeProto*> output_types;
|
|
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("body");
|
|
if (graphInferencer) {
|
|
std::vector<const TensorProto*> input_data;
|
|
for (size_t i = 1; i < num_inputs; ++i) {
|
|
input_data.push_back(ctx.getInputData(i));
|
|
}
|
|
|
|
output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
// if empty(), assume inferencing was skipped
|
|
if (!output_types.empty()) {
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
if (output_types.size() != num_outputs) {
|
|
fail_type_inference(
|
|
"Graph attribute inferencing returned type information for ",
|
|
output_types.size(),
|
|
" outputs. Expected ",
|
|
num_outputs);
|
|
}
|
|
|
|
// propagate type/shape information for loop state variables and outputs
|
|
for (size_t i = 0; i < num_outputs; ++i) {
|
|
const bool is_loop_state_var = i < num_loop_state_vars;
|
|
auto* subgraph_output_type = output_types[i];
|
|
auto* scan_output_type = ctx.getOutputType(i);
|
|
|
|
if (!subgraph_output_type->has_tensor_type()) {
|
|
fail_type_inference("Scan 'body' subgraph outputs should all be tensors but output ", i, " was not");
|
|
}
|
|
|
|
// propagate output type. loop state vars were done in the above code.
|
|
if (!is_loop_state_var) {
|
|
scan_output_type->mutable_tensor_type()->set_elem_type(subgraph_output_type->tensor_type().elem_type());
|
|
}
|
|
|
|
// propagate shape
|
|
if (subgraph_output_type->tensor_type().has_shape()) {
|
|
// we need to add in the batch size and sequence length values if
|
|
// available before merging with any existing info. Create a copy of the
|
|
// inferred type info from the subgraph to do that.
|
|
TypeProto inferred_type(*subgraph_output_type);
|
|
auto* mutable_inferred_tensor_type = inferred_type.mutable_tensor_type();
|
|
auto* mutable_inferred_shape = mutable_inferred_tensor_type->mutable_shape();
|
|
|
|
mutable_inferred_shape->clear_dim();
|
|
*mutable_inferred_shape->add_dim() = batch_size_dim;
|
|
|
|
if (!is_loop_state_var) {
|
|
*mutable_inferred_shape->add_dim() = sequence_len_dim;
|
|
}
|
|
|
|
for (const auto& dim : subgraph_output_type->tensor_type().shape().dim()) {
|
|
(*mutable_inferred_shape->add_dim()) = dim;
|
|
}
|
|
|
|
auto* mutable_scan_output_tensor_type = scan_output_type->mutable_tensor_type();
|
|
|
|
mergeInShapeInfo(*mutable_inferred_tensor_type, *mutable_scan_output_tensor_type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int handle_negative_axis_validate_opset9(const std::string& attrib, int axis, int rank) {
|
|
if (!(-rank <= axis && axis < rank)) {
|
|
fail_shape_inference(attrib, " axis value ", axis, " is invalid for a tensor of rank ", rank);
|
|
}
|
|
return (axis >= 0 ? axis : axis + rank);
|
|
}
|
|
|
|
void ScanInferenceFunctionOpset9(InferenceContext& ctx) {
|
|
auto num_inputs = ctx.getNumInputs();
|
|
auto num_scan_inputs = narrow_cast<size_t>(ctx.getAttribute("num_scan_inputs")->i());
|
|
auto num_loop_state_vars = num_inputs - num_scan_inputs;
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
auto num_scan_outputs = num_outputs - num_loop_state_vars;
|
|
|
|
std::vector<int64_t> axes, output_axes;
|
|
if (getRepeatedAttribute(ctx, "scan_input_axes", axes)) {
|
|
if (axes.size() != num_scan_inputs) {
|
|
fail_shape_inference(
|
|
"Number of scan input axes specified (",
|
|
axes.size(),
|
|
") is not equal to number of scan inputs (",
|
|
num_scan_inputs,
|
|
").");
|
|
}
|
|
} else {
|
|
axes.insert(axes.end(), num_scan_inputs, 0);
|
|
}
|
|
|
|
if (getRepeatedAttribute(ctx, "scan_output_axes", output_axes)) {
|
|
if (output_axes.size() != num_scan_outputs) {
|
|
fail_shape_inference(
|
|
"Number of scan output axes specified (",
|
|
output_axes.size(),
|
|
") is not equal to number of scan outputs (",
|
|
num_scan_outputs,
|
|
").");
|
|
}
|
|
} else {
|
|
output_axes.insert(output_axes.end(), num_scan_outputs, 0);
|
|
}
|
|
|
|
std::vector<TypeProto> temporary_type_protos;
|
|
temporary_type_protos.reserve(num_inputs);
|
|
|
|
std::vector<const TypeProto*> subgraph_input_types;
|
|
|
|
TensorShapeProto_Dimension sequence_len_dim;
|
|
|
|
for (size_t i = 0; i < num_inputs; ++i) {
|
|
bool is_loop_state_var = i < num_loop_state_vars;
|
|
bool has_shape = hasInputShape(ctx, i);
|
|
const auto* input_type = ctx.getInputType(i);
|
|
|
|
// Enforce type constraint for inputs
|
|
if (!input_type || !input_type->has_tensor_type()) {
|
|
fail_type_inference("Scan input ", i, " was not a tensor.");
|
|
}
|
|
|
|
if (is_loop_state_var) {
|
|
// If it's a loop state variable we can propagate type and shape 1:1 to
|
|
// the matching Scan output.
|
|
// We can also pass through the type and shape to the subgraph but need to
|
|
// remove the batch size dimension from the shape.
|
|
propagateElemTypeFromInputToOutput(ctx, i, i);
|
|
if (has_shape)
|
|
propagateShapeFromInputToOutput(ctx, i, i);
|
|
|
|
subgraph_input_types.push_back(input_type);
|
|
} else {
|
|
// For other inputs there is no fixed relationships to the Scan outputs,
|
|
// so we don't propagate type/shape information.
|
|
// We can pass through the type and shape to the subgraph inputs but
|
|
// need to remove the sequence length dimensions from the shape.
|
|
if (has_shape) {
|
|
const auto& shape = input_type->tensor_type().shape();
|
|
|
|
// remove sequence length dimensions and add to subgraph_input_types
|
|
int axis = static_cast<int>(axes[i - num_loop_state_vars]);
|
|
axis = handle_negative_axis_validate_opset9("scan_input_axes", axis, shape.dim_size());
|
|
|
|
// update sequence_len if a value is available
|
|
|
|
const auto& dims = shape.dim();
|
|
mergeInDimensionInfo(dims.Get(axis), sequence_len_dim, 1);
|
|
|
|
temporary_type_protos.push_back(RemoveIthDimensionFromShape(*input_type, axis));
|
|
subgraph_input_types.push_back(&temporary_type_protos.back());
|
|
|
|
} else {
|
|
subgraph_input_types.push_back(input_type);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Run inferencing on the subgraph
|
|
std::vector<const TypeProto*> output_types;
|
|
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("body");
|
|
if (graphInferencer) {
|
|
std::vector<const TensorProto*> input_data;
|
|
for (size_t i = 0; i < num_inputs; ++i) {
|
|
// ctx.getInputData(i), the input to scan, does not represent the input to
|
|
// scan body. So, we pass in null, to represent an unknown value.
|
|
input_data.push_back(nullptr);
|
|
}
|
|
|
|
output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
// if empty(), assume inferencing was skipped
|
|
if (!output_types.empty()) {
|
|
if (output_types.size() != num_outputs) {
|
|
fail_type_inference(
|
|
"Graph attribute inferencing returned type information for ",
|
|
output_types.size(),
|
|
" outputs. Expected ",
|
|
num_outputs);
|
|
}
|
|
|
|
// propagate type/shape information for loop state variables and outputs
|
|
for (size_t i = 0; i < num_outputs; ++i) {
|
|
const bool is_loop_state_var = i < num_loop_state_vars;
|
|
auto* subgraph_output_type = output_types[i];
|
|
auto* scan_output_type = ctx.getOutputType(i);
|
|
auto* mutable_scan_output_tensor_type = scan_output_type->mutable_tensor_type();
|
|
|
|
if (!subgraph_output_type->has_tensor_type()) {
|
|
fail_type_inference("Scan 'body' subgraph outputs should all be tensors but output ", i, " was not");
|
|
}
|
|
auto& subgraph_output_tensor_type = subgraph_output_type->tensor_type();
|
|
|
|
if (is_loop_state_var) {
|
|
// merge shape; type already propagated
|
|
mergeInShapeInfo(subgraph_output_tensor_type, *mutable_scan_output_tensor_type);
|
|
} else {
|
|
scan_output_type->mutable_tensor_type()->set_elem_type(subgraph_output_tensor_type.elem_type());
|
|
|
|
// propagate shape
|
|
if (subgraph_output_tensor_type.has_shape()) {
|
|
// infer shape of scan-output from the shape of scan-output-element
|
|
// by adding sequence-length at the correct axis position
|
|
const TensorShapeProto& subgraph_output_shape = subgraph_output_tensor_type.shape();
|
|
TensorShapeProto inferred_shape;
|
|
|
|
auto subgraph_output_rank = subgraph_output_shape.dim_size();
|
|
auto output_rank = subgraph_output_rank + 1;
|
|
int output_axis = static_cast<int>(output_axes[i - num_loop_state_vars]);
|
|
output_axis = handle_negative_axis_validate_opset9("scan_output_axes", output_axis, output_rank);
|
|
|
|
for (int j = 0; j < output_axis; ++j)
|
|
*(inferred_shape.add_dim()) = subgraph_output_shape.dim(j);
|
|
*(inferred_shape.add_dim()) = sequence_len_dim;
|
|
for (int j = output_axis; j < subgraph_output_rank; ++j)
|
|
*(inferred_shape.add_dim()) = subgraph_output_shape.dim(j);
|
|
|
|
// Merge inferred shape with existing shape information
|
|
mergeInShapeInfo(inferred_shape, *mutable_scan_output_tensor_type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static const char* scan_opset8_doc = R"DOC(
|
|
Scan can be used to iterate over one or more scan_input tensors,
|
|
constructing zero or more scan_output tensors. It combines ideas from general recurrences,
|
|
functional programming constructs such as scan, fold, map, and zip, and is intended to enable
|
|
generalizations of RNN-like constructs for sequence-to-sequence processing.
|
|
Other tensors (referred to as state_variables here) can be used to carry a state
|
|
when iterating from one element to another (similar to hidden-state in RNNs, also referred
|
|
to as loop-carried dependences in the context of loops). All these tensors are required to
|
|
have the same shape in each iteration of the loop (a restriction imposed to enable efficient
|
|
memory allocation). Many common usages involve a single scan_input tensor (where functionality
|
|
similar to scan, fold and map can be obtained). When more than one scan_input is used,
|
|
a behavior similar to zip is obtained.
|
|
|
|
The attribute body must be a graph, specifying the computation to be performed in
|
|
every iteration. It takes as input the current values of the state_variables and
|
|
the current iterated element of the scan_inputs. It must return the (updated) values
|
|
of the state_variables and zero or more scan_output_element tensors. The values of the
|
|
scan_output_element tensors are concatenated over all the iterations to produce the
|
|
scan_output values of the scan construct (similar to the concatenated intermediate
|
|
hidden-state values of RNN-like constructs).
|
|
|
|
The scan operation returns the final values of the state_variables as well as the
|
|
scan_outputs.
|
|
|
|
The operation supports batching, and the batch-axis is required to be 0.
|
|
When multiple scan_input tensors are used, they must all have the same batch-size,
|
|
and they must all have the same maximum-sequence-length (the dimensionality of the
|
|
sequence axis or scan axis). The sequence axis or scan axis is required to be 1.
|
|
|
|
The operation has an optional sequence_lens input (of shape [BATCH_SIZE]) to
|
|
allow variable length sequences of length <= the maximum-sequence-length. If this
|
|
input is not specified, all sequences are assumed to be of length equal to
|
|
maximum-sequence-length. For variable length input sequences, the scan_outputs
|
|
will consist of a sequence of same length as the input, padded to the
|
|
maximum-sequence-length.
|
|
|
|
The optional attribute directions can be used to scan a sequence in the reverse direction.
|
|
If this attribute is omitted, all sequences are scanned in the forward direction.
|
|
A bidirectional scan be performed by specifying the same tensor input twice in the
|
|
scan_inputs, once with a forward direction, and once with a backward direction.
|
|
|
|
Note that because of the ONNX restriction that only the last parameter of an operator can
|
|
be variadic, the initial-states and scan-inputs are listed together as one input parameter.
|
|
Similarly, the final-states and scan-outputs are listed together as one output parameter.
|
|
The attribute num_scan_inputs indicates the number M of scan-inputs.
|
|
|
|
The behavior of
|
|
|
|
Scan <
|
|
num_scan_inputs = m,
|
|
body = loop-body
|
|
> (sequence_lengths, init_1, ..., init_n, scan_1, ..., scan_m)
|
|
|
|
is equivalent to the following pseudo-code:
|
|
|
|
// T.shape[0] denotes the batch-size of T
|
|
// The batch-size of scan_1, ..., scan_m are all required to be equal
|
|
batch_size = scan_1.shape[0];
|
|
|
|
// scan_i.shape[1] denotes the (max) sequence-length of scan_i
|
|
// scan_i.shape[1] is required to be equal to scan_j.shape[1] for all i,j.
|
|
max_sequence_length = scan_1.shape[1];
|
|
|
|
for (int batch = 0; batch < batch_size; ++batch) {
|
|
// initialize state-variables
|
|
st_1 = init_1; ... st_n = init_n;
|
|
// initialize scan-output variables: [] denotes an empty tensor
|
|
scan_out_1 = []; ...; scan_out_k = [];
|
|
// identify number of iterations:
|
|
N = (sequence_lengths specified) ? sequence_lengths[batch] : max_sequence_length;
|
|
|
|
// execute loop
|
|
for (int t = 0; t < N; ++t) {
|
|
// generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor
|
|
// of rank one less than T obtained by indexing T at position t along axis k.
|
|
si_1 = (scan_1<axis=0>[batch])<axis=1>[t];
|
|
... ;
|
|
si_m = (scan_m<axis=0>[batch])<axis=1>[t];
|
|
// execute loop-body
|
|
st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)
|
|
// accumulate the scan-output elements
|
|
scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);
|
|
}
|
|
// accumulate the outputs for this batch:
|
|
bst_1[batch] = st_1; ..., bst_n[batch] = st_n;
|
|
// Note scan-outputs will have size max_sequence_length, but only first N values will be meaningful.
|
|
// The remaining values have an undefined value.
|
|
b_scan_out_1[batch] = scan_out_1; ...; b_scan_out_k[batch] = scan_out_k;
|
|
}
|
|
return bst_1, ..., bst_n, b_scan_out_1, ..., b_scan_out_k;
|
|
|
|
|
|
|
|
*Sample usage: Encoding RNN using a Scan*
|
|
|
|
The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,
|
|
recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can
|
|
be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes
|
|
%Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these
|
|
values are computed in the outer graph, they need to be passed in as extra state_variables.
|
|
|
|
graph rnn-encoding {
|
|
%H_0 = ...
|
|
%X = ...
|
|
%Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1]("", %H_0, %X)
|
|
return %Y, %Y_h
|
|
}
|
|
|
|
graph rnn-cell-1 (
|
|
%H_tminus1[FLOAT, tensor]
|
|
%X_t[FLOAT, tensor]
|
|
) {
|
|
%Wi = ...
|
|
%Ri = ...
|
|
%Wbi = ...
|
|
%Rbi = ...
|
|
%t1 = X_t * (Wi^T)
|
|
%t2 = H_tminus1*(Ri^T)
|
|
%t3 = Add(%t1, %t2)
|
|
%t4 = Add(%t3, %Wbi)
|
|
%t5 = Add(%t4, %Rbi)
|
|
%Ht = Tanh(%t5)
|
|
%Accumulate = Identity(%Ht)
|
|
return %Ht, %Accumulate
|
|
}
|
|
|
|
)DOC";
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Scan,
|
|
8,
|
|
OpSchema()
|
|
.SetDoc(scan_opset8_doc)
|
|
.Input(
|
|
0,
|
|
"sequence_lens",
|
|
"Optional tensor specifying lengths of the sequences in a batch. "
|
|
"If this input is not specified, all sequences are assumed to be of "
|
|
"the maximum sequence length (the dimension of the sequence axis of "
|
|
"the scan_input tensors).",
|
|
"I",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
1,
|
|
"initial_state_and_scan_inputs",
|
|
"Initial values of the loop's N state variables followed by M scan_inputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Output(
|
|
0,
|
|
"final_state_and_scan_outputs",
|
|
"Final values of the loop's N state variables followed by K scan_outputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has N+M inputs: "
|
|
"(loop state variables..., scan_input_elts...). It has N+K outputs: "
|
|
"(loop state variables..., scan_output_elts...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"scan_output_elt value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions of these values change across loop iterations.",
|
|
AttributeProto::GRAPH,
|
|
true)
|
|
.Attr("num_scan_inputs", "An attribute specifying the number of scan_inputs M. ", AttributeProto::INT, true)
|
|
.Attr(
|
|
"directions",
|
|
"An optional list of M flags. The i-th element of the list specifies the direction "
|
|
"to be scanned for the i-th scan_input tensor: 0 indicates forward direction and 1 "
|
|
"indicates reverse direction. "
|
|
"If omitted, all scan_input tensors will be scanned in the forward direction.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.TypeConstraint("I", {"tensor(int64)"}, "Int64 tensor")
|
|
.TypeConstraint("V", OpSchema::all_tensor_types(), "All Tensor types")
|
|
.TypeAndShapeInferenceFunction(ScanInferenceFunctionOpset8));
|
|
|
|
void LoopInferenceFunctionOpset8(InferenceContext& ctx) {
|
|
auto num_inputs = ctx.getNumInputs();
|
|
auto num_loop_state_vars = num_inputs - 2; // skip 'M' and 'cond'
|
|
|
|
std::vector<const TypeProto*> subgraph_input_types;
|
|
|
|
std::vector<TypeProto> temporary_type_protos;
|
|
temporary_type_protos.reserve(num_inputs - 2);
|
|
|
|
// create TypeProto to validate iteration number type is the same as the
|
|
// optional 'M' input for max iterations.
|
|
TypeProto iter_num_type;
|
|
iter_num_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT64);
|
|
subgraph_input_types.push_back(&iter_num_type);
|
|
|
|
// 'cond'
|
|
subgraph_input_types.push_back(ctx.getInputType(1));
|
|
|
|
// loop state value types get propagated to outputs, but shape may change
|
|
// across iterations so don't propagate it to the outputs and don't pass it
|
|
// into the subgraph inferencing
|
|
for (size_t i = 2; i < num_inputs; ++i) {
|
|
propagateElemTypeFromInputToOutput(ctx, i, i - 2);
|
|
|
|
// copy so we can remove the shape before passing to the subgraph
|
|
// inferencing
|
|
temporary_type_protos.push_back(*ctx.getInputType(i));
|
|
auto& input_type = temporary_type_protos.back();
|
|
input_type.mutable_tensor_type()->clear_shape();
|
|
|
|
subgraph_input_types.push_back(&input_type);
|
|
}
|
|
|
|
// Run inferencing on the subgraph
|
|
std::vector<const TypeProto*> subgraph_output_types;
|
|
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("body");
|
|
if (graphInferencer) {
|
|
std::vector<const TensorProto*> input_data;
|
|
input_data.push_back(nullptr); // iteration number
|
|
for (size_t i = 1; i < num_inputs; ++i) {
|
|
input_data.push_back(ctx.getInputData(i));
|
|
}
|
|
|
|
subgraph_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
// if empty(), assume inferencing was skipped
|
|
if (!subgraph_output_types.empty()) {
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
|
|
// subgraph outputs the condition value first but that is only used
|
|
// internally and not returned by Loop.
|
|
if (subgraph_output_types.size() != num_outputs + 1) {
|
|
fail_type_inference(
|
|
"Graph attribute inferencing returned type information for ",
|
|
subgraph_output_types.size(),
|
|
" outputs. Expected ",
|
|
num_outputs + 1);
|
|
}
|
|
|
|
// check loop state values match. we should already have type/shape info
|
|
for (size_t i = 0; i < num_outputs; ++i) {
|
|
auto* subgraph_output_type = subgraph_output_types[i + 1]; // skip 'cond'
|
|
auto* loop_output_type = ctx.getOutputType(i);
|
|
|
|
const bool is_loop_state_var = i < num_loop_state_vars;
|
|
|
|
if (!subgraph_output_type->has_tensor_type()) {
|
|
fail_type_inference(
|
|
"Loop 'body' subgraph outputs should all be tensors but output ",
|
|
i,
|
|
" was ",
|
|
subgraph_output_type->value_case());
|
|
}
|
|
|
|
// if there's an existing type check it matches. otherwise propagate
|
|
propagateElemTypeWithValidation(subgraph_output_type, loop_output_type);
|
|
|
|
if (is_loop_state_var) {
|
|
// shape may change across iterations so ignore.
|
|
} else {
|
|
// propagate shape
|
|
if (subgraph_output_type->tensor_type().has_shape()) {
|
|
// per iteration output. first dimension will be number of iterations
|
|
// but we don't know that value yet
|
|
TypeProto inferred_type(*subgraph_output_type);
|
|
auto* mutable_inferred_tensor_type = inferred_type.mutable_tensor_type();
|
|
auto* mutable_inferred_shape = mutable_inferred_tensor_type->mutable_shape();
|
|
|
|
mutable_inferred_shape->clear_dim();
|
|
|
|
// add empty dimension for number of iterations
|
|
mutable_inferred_shape->add_dim();
|
|
|
|
// add dimensions from subgraph output shape
|
|
for (const auto& dim : subgraph_output_type->tensor_type().shape().dim()) {
|
|
(*mutable_inferred_shape->add_dim()) = dim;
|
|
}
|
|
|
|
mergeInShapeInfo(*mutable_inferred_tensor_type, *loop_output_type->mutable_tensor_type());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static const char* Loop_ver1_doc = R"DOC(
|
|
Generic Looping construct. This loop has multiple termination conditions:
|
|
|
|
1) Trip count. Iteration count specified at runtime. Set by
|
|
specifying the input M. Optional. Set to empty string to omit.
|
|
Note that a static trip count (specified at graph construction time) can be
|
|
specified by passing in a constant node for input M.
|
|
2) Loop termination condition. This is an input to the op that determines
|
|
whether to run the first iteration and also a loop-carried dependency for
|
|
the body graph. The body graph must yield a value for the condition variable,
|
|
whether this input is provided or not.
|
|
|
|
This table summarizes the operating modes of this operator with equivalent
|
|
C-style code:
|
|
|
|
Operator inputs defined as (max_trip_count, condition_var).
|
|
|
|
input ("", ""):
|
|
for (int i=0; ; ++i) {
|
|
cond = ... // Note this value is ignored, but is required in the body
|
|
}
|
|
|
|
input ("", cond) // Note this is analogous to a while loop
|
|
bool cond = ...;
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
input ("", 1) // Note this is analogous to a do-while loop
|
|
bool cond = true
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
input (trip_count, "") // Note this is analogous to a for loop
|
|
int trip_count = ...
|
|
for (int i=0; i < trip_count; ++i) {
|
|
cond = ...; // ignored
|
|
}
|
|
|
|
input (trip_count, cond)
|
|
int trip_count = ...;
|
|
bool cond = ...;
|
|
for (int i=0; i < trip_count && cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
|
|
*Sample usage - cond as well as trip count*
|
|
|
|
graph predict-net {
|
|
%a = Constant[value = <Scalar Tensor [3]>]()
|
|
%b = Constant[value = <Scalar Tensor [6]>]()
|
|
%keepgoing = Constant[value = <Scalar Tensor [1]>]()
|
|
%max_trip_count = Constant[value = <Scalar Tensor [10]>]()
|
|
%keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
|
|
return
|
|
}
|
|
|
|
graph body-net (
|
|
%i[INT32, scalar]
|
|
%keepgoing[BOOL, scalar]
|
|
%b[INT32, scalar]
|
|
) {
|
|
%my_local = Add(%a, %b)
|
|
%b_out = Sub(%a, %b)
|
|
%keepgoing_out = Greater(%my_local, %b_out)
|
|
%user_defined_vals = Add(%b, %b)
|
|
return %keepgoing_out, %b_out, %user_defined_vals
|
|
}
|
|
|
|
*Sample equivalent C code*
|
|
|
|
{
|
|
/* User-defined code (enclosing scope) */
|
|
int a = 3, b = 6;
|
|
bool keepgoing = true; // Analogous to input cond
|
|
/* End user-defined code */
|
|
|
|
/* Implicitly-defined code */
|
|
const int max_trip_count = 10; // Analogous to input M
|
|
int user_defined_vals[]; // Imagine this is resizable
|
|
/* End implicitly-defined code */
|
|
for (int i=0; i < max_trip_count && keepgoing; ++i) {
|
|
/* User-defined code (loop body) */
|
|
int my_local = a + b; // Reading values in the enclosing scope is fine
|
|
b = a - b; // writes fine if we specify b as a loop-carried dependency
|
|
keepgoing = my_local > b; // keepgoing is a loop-carried dependency
|
|
user_defined_vals[i] = b + b;
|
|
/* End user-defined code */
|
|
}
|
|
// my_local = 123; // Can't do this. my_local was defined in the body
|
|
|
|
// These below values are live-out from the loop and therefore accessible
|
|
b_out; user_defined_vals; keepgoing_out;
|
|
}
|
|
|
|
There are several things of note in this code snippet:
|
|
|
|
1) Values from the enclosing scope (i.e. variable a here) are in scope and can
|
|
be referenced in the inputs of the loop.
|
|
2) Any variables which you wish to make available in the enclosing scope (i.e.
|
|
the variables b and keepgoing) must be declared as either loop-carried
|
|
dependencies (both at the op inputs and output and at the body net input and
|
|
output) or scan_outputs.
|
|
3) Values created in the body cannot be accessed in the enclosing scope.
|
|
|
|
Note that the semantics of this op support "diagonal" or "wavefront" execution.
|
|
(See Step 3 here for an example:
|
|
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
|
|
Frontends should emit multi-layer RNNs as a series of While operators (with
|
|
time being the inner looping dimension), with each successive layer consuming
|
|
the scan_outputs from the previous layer, possibly going through several
|
|
point-wise operators (e.g. dropout, residual connections, linear layer).
|
|
)DOC";
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Loop,
|
|
1,
|
|
OpSchema()
|
|
.SetDoc(Loop_ver1_doc)
|
|
.Input(
|
|
0,
|
|
"M",
|
|
"A maximum trip-count for the loop specified at runtime. Optional."
|
|
" Pass empty string to skip.",
|
|
"I",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
1,
|
|
"cond",
|
|
"A boolean termination condition. Optional. Pass empty string to skip.",
|
|
"B",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
2,
|
|
"v_initial",
|
|
"The initial values of any loop-carried dependencies (values that "
|
|
"change across loop iterations)",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Output(
|
|
0,
|
|
"v_final_and_scan_outputs",
|
|
"Final N loop carried dependency values then K scan_outputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has 2+N inputs: (iteration_num, "
|
|
"condition, loop carried dependencies...). It has 1+N+K outputs: "
|
|
"(condition, loop carried dependencies..., scan_outputs...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"output value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions or data type of these scan_outputs change across loop"
|
|
" iterations.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types(), "All Tensor types")
|
|
.TypeConstraint("I", {"tensor(int64)"}, "tensor of int64, which should be a scalar.")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "tensor of bool, which should be a scalar.")
|
|
.TypeAndShapeInferenceFunction(LoopInferenceFunctionOpset8));
|
|
|
|
void LoopInferenceFunctionOpset11(InferenceContext& ctx) {
|
|
auto num_inputs = ctx.getNumInputs();
|
|
auto num_loop_state_vars = num_inputs - 2; // skip 'M' and 'cond'
|
|
|
|
std::vector<const TypeProto*> subgraph_input_types;
|
|
|
|
std::vector<TypeProto> temporary_type_protos;
|
|
temporary_type_protos.reserve(num_inputs - 2);
|
|
|
|
// create TypeProto to validate iteration number type is the same as the
|
|
// optional 'M' input for max iterations.
|
|
TypeProto iter_num_type;
|
|
iter_num_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT64);
|
|
subgraph_input_types.push_back(&iter_num_type);
|
|
|
|
// 'cond'
|
|
subgraph_input_types.push_back(ctx.getInputType(1));
|
|
|
|
// loop state value types get propagated to outputs, but shape may change
|
|
// across iterations so don't propagate it to the outputs and don't pass it
|
|
// into the subgraph inferencing
|
|
for (size_t i = 2; i < num_inputs; ++i) {
|
|
propagateElemTypeFromInputToOutput(ctx, i, i - 2);
|
|
|
|
// copy so we can remove the shape before passing to the subgraph
|
|
// inferencing
|
|
temporary_type_protos.push_back(*ctx.getInputType(i));
|
|
auto& input_type = temporary_type_protos.back();
|
|
input_type.mutable_tensor_type()->clear_shape();
|
|
|
|
subgraph_input_types.push_back(&input_type);
|
|
}
|
|
|
|
// Run inferencing on the subgraph
|
|
std::vector<const TypeProto*> subgraph_output_types;
|
|
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("body");
|
|
if (graphInferencer) {
|
|
std::vector<const TensorProto*> input_data;
|
|
input_data.push_back(nullptr); // iteration number
|
|
for (size_t i = 1; i < num_inputs; ++i) {
|
|
input_data.push_back(ctx.getInputData(i));
|
|
}
|
|
|
|
subgraph_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
// if empty(), assume inferencing was skipped
|
|
if (!subgraph_output_types.empty()) {
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
|
|
// subgraph outputs the condition value first but that is only used
|
|
// internally and not returned by Loop.
|
|
if (subgraph_output_types.size() != num_outputs + 1) {
|
|
fail_type_inference(
|
|
"Graph attribute inferencing returned type information for ",
|
|
subgraph_output_types.size(),
|
|
" outputs. Expected ",
|
|
num_outputs + 1);
|
|
}
|
|
|
|
// check loop state values match. we should already have type/shape info
|
|
for (size_t i = 0; i < num_outputs; ++i) {
|
|
auto* subgraph_output_type = subgraph_output_types[i + 1]; // skip 'cond'
|
|
auto* loop_output_type = ctx.getOutputType(i);
|
|
|
|
const bool is_loop_state_var = i < num_loop_state_vars;
|
|
|
|
if (!subgraph_output_type->has_tensor_type()) {
|
|
fail_type_inference(
|
|
"Loop 'body' subgraph outputs should all be tensors but output ",
|
|
i,
|
|
" was ",
|
|
subgraph_output_type->value_case());
|
|
}
|
|
|
|
// if there's an existing type check it matches. otherwise propagate
|
|
propagateElemTypeWithValidation(subgraph_output_type, loop_output_type);
|
|
|
|
if (is_loop_state_var) {
|
|
// shape may change across iterations so ignore.
|
|
} else {
|
|
// propagate shape
|
|
if (subgraph_output_type->tensor_type().has_shape()) {
|
|
// per iteration output. first dimension will be number of iterations
|
|
// but we don't know that value yet
|
|
TypeProto inferred_type(*subgraph_output_type);
|
|
auto* mutable_inferred_tensor_type = inferred_type.mutable_tensor_type();
|
|
auto* mutable_inferred_shape = mutable_inferred_tensor_type->mutable_shape();
|
|
|
|
mutable_inferred_shape->clear_dim();
|
|
|
|
// add empty dimension for number of iterations
|
|
mutable_inferred_shape->add_dim();
|
|
|
|
// add dimensions from subgraph output shape
|
|
for (const auto& dim : subgraph_output_type->tensor_type().shape().dim()) {
|
|
(*mutable_inferred_shape->add_dim()) = dim;
|
|
}
|
|
|
|
mergeInShapeInfo(*mutable_inferred_tensor_type, *loop_output_type->mutable_tensor_type());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static const char* Loop_ver11_doc = R"DOC(
|
|
Generic Looping construct. This loop has multiple termination conditions:
|
|
|
|
1) Trip count. Iteration count specified at runtime. Set by
|
|
specifying the input M. Optional. Set to empty string to omit.
|
|
Note that a static trip count (specified at graph construction time) can be
|
|
specified by passing in a constant node for input M.
|
|
2) Loop termination condition. This is an input to the op that determines
|
|
whether to run the first iteration and also a loop-carried dependency for
|
|
the body graph. The body graph must yield a value for the condition variable,
|
|
whether this input is provided or not.
|
|
|
|
This table summarizes the operating modes of this operator with equivalent
|
|
C-style code:
|
|
|
|
Operator inputs defined as (max_trip_count, condition_var).
|
|
|
|
input ("", ""):
|
|
for (int i=0; ; ++i) {
|
|
cond = ... // Note this value is ignored, but is required in the body
|
|
}
|
|
|
|
input ("", cond) // Note this is analogous to a while loop
|
|
bool cond = ...;
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
input ("", 1) // Note this is analogous to a do-while loop
|
|
bool cond = true
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
input (trip_count, "") // Note this is analogous to a for loop
|
|
int trip_count = ...
|
|
for (int i=0; i < trip_count; ++i) {
|
|
cond = ...; // ignored
|
|
}
|
|
|
|
input (trip_count, cond)
|
|
int trip_count = ...;
|
|
bool cond = ...;
|
|
for (int i=0; i < trip_count && cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
|
|
*Sample usage - cond as well as trip count*
|
|
|
|
graph predict-net {
|
|
%a = Constant[value = <Scalar Tensor [3]>]()
|
|
%b = Constant[value = <Scalar Tensor [6]>]()
|
|
%keepgoing = Constant[value = <Scalar Tensor [1]>]()
|
|
%max_trip_count = Constant[value = <Scalar Tensor [10]>]()
|
|
%keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
|
|
return
|
|
}
|
|
|
|
graph body-net (
|
|
%i[INT32, scalar] // iteration number
|
|
%keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used
|
|
%b_in[INT32, scalar] // incoming value of loop-carried-dependency b
|
|
) {
|
|
%my_local = Add(%a, %b_in)
|
|
%b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b
|
|
%keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition
|
|
%user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated
|
|
return %keepgoing_out, %b_out, %user_defined_val
|
|
}
|
|
|
|
*Sample equivalent C code*
|
|
|
|
{
|
|
/* User-defined code (enclosing scope) */
|
|
int a = 3, b = 6;
|
|
bool keepgoing = true; // Analogous to input cond
|
|
/* End user-defined code */
|
|
|
|
/* Implicitly-defined code */
|
|
const int max_trip_count = 10; // Analogous to input M
|
|
int user_defined_vals[]; // Imagine this is resizable
|
|
/* End implicitly-defined code */
|
|
/* initialize loop-carried variables and scan-output variables */
|
|
bool keepgoing_out = keepgoing
|
|
int b_out = b
|
|
|
|
for (int i=0; i < max_trip_count && keepgoing_out; ++i) {
|
|
/* Implicitly-defined code: bind actual parameter values
|
|
to formal parameter variables of loop-body */
|
|
bool keepgoing_in = keepgoing_out;
|
|
bool b_in = b_out;
|
|
|
|
/* User-defined code (loop body) */
|
|
int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
|
|
b_out = a - b_in;
|
|
keepgoing_out = my_local > b_out;
|
|
user_defined_val = b_in + b_in; // b_in and b_out are different variables
|
|
/* End user-defined code */
|
|
|
|
/* Implicitly defined-code */
|
|
user_defined_vals[i] = user_defined_val // accumulate scan-output values
|
|
}
|
|
// int t = my_local; // Can't do this. my_local is not accessible here.
|
|
|
|
// The values below are bound to the output variables of the loop and therefore accessible
|
|
// b_out; user_defined_vals; keepgoing_out;
|
|
}
|
|
|
|
There are several things of note in this code snippet:
|
|
|
|
1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can
|
|
be referenced in the inputs of the loop.
|
|
2) Any values computed in the loop body that needs to be used in a subsequent
|
|
iteration or after the loop are modelled using a pair of variables in the loop-body,
|
|
consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
|
|
These are referred to as loop-carried dependences. The loop operation node
|
|
supplies the input value of the input variable for the first iteration, and
|
|
returns the output value of the output variable produced by the final
|
|
iteration.
|
|
3) Scan_output variables are used to implicitly concatenate values computed across
|
|
all the iterations. In the above example, the value of user_defined_val computed
|
|
over all iterations are concatenated and returned as the value of user_defined_vals
|
|
after the loop.
|
|
4) Values created in the body cannot be accessed in the enclosing scope,
|
|
except using the mechanism described above.
|
|
|
|
Note that the semantics of this op support "diagonal" or "wavefront" execution.
|
|
(See Step 3 here for an example:
|
|
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
|
|
Frontends should emit multi-layer RNNs as a series of While operators (with
|
|
time being the inner looping dimension), with each successive layer consuming
|
|
the scan_outputs from the previous layer, possibly going through several
|
|
point-wise operators (e.g. dropout, residual connections, linear layer).
|
|
)DOC";
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Loop,
|
|
11,
|
|
OpSchema()
|
|
.SetDoc(Loop_ver11_doc)
|
|
.Input(
|
|
0,
|
|
"M",
|
|
"A maximum trip-count for the loop specified at runtime. Optional."
|
|
" Pass empty string to skip.",
|
|
"I",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
1,
|
|
"cond",
|
|
"A boolean termination condition. Optional. Pass empty string to skip.",
|
|
"B",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
2,
|
|
"v_initial",
|
|
"The initial values of any loop-carried dependencies (values that "
|
|
"change across loop iterations)",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false,
|
|
0)
|
|
.Output(
|
|
0,
|
|
"v_final_and_scan_outputs",
|
|
"Final N loop carried dependency values then K scan_outputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has 2+N inputs: (iteration_num, "
|
|
"condition, loop carried dependencies...). It has 1+N+K outputs: "
|
|
"(condition, loop carried dependencies..., scan_outputs...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"output value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions or data type of these scan_outputs change across loop"
|
|
" iterations.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types(), "All Tensor types")
|
|
.TypeConstraint("I", {"tensor(int64)"}, "tensor of int64, which should be a scalar.")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "tensor of bool, which should be a scalar.")
|
|
.TypeAndShapeInferenceFunction(LoopInferenceFunctionOpset11));
|
|
|
|
static const char* scan_9_doc = R"DOC(
|
|
Scan can be used to iterate over one or more scan_input tensors,
|
|
constructing zero or more scan_output tensors. It combines ideas from general recurrences,
|
|
functional programming constructs such as scan, fold, map, and zip, and is intended to enable
|
|
generalizations of RNN-like constructs for sequence-to-sequence processing.
|
|
Other tensors (referred to as state_variables here) can be used to carry a state
|
|
when iterating from one element to another (similar to hidden-state in RNNs, also referred
|
|
to as loop-carried dependences in the context of loops).
|
|
Many common usages involve a single scan_input tensor (where functionality
|
|
similar to scan, fold and map can be obtained). When more than one scan_input is used,
|
|
a behavior similar to zip is obtained.
|
|
|
|
The attribute body must be a graph, specifying the computation to be performed in
|
|
every iteration. It takes as input the current values of the state_variables and
|
|
the current iterated element of the scan_inputs. It must return the (updated) values
|
|
of the state_variables and zero or more scan_output_element tensors. The values of the
|
|
scan_output_element tensors are concatenated over all the iterations to produce the
|
|
scan_output values of the scan construct (similar to the concatenated intermediate
|
|
hidden-state values of RNN-like constructs). All the output tensors (state_variables as
|
|
well as scan_output_element tensors) are required to have the same shape in each iteration
|
|
of the loop (a restriction imposed to enable efficient memory allocation).
|
|
|
|
Note that the iterated element passed to the body subgraph does not have a sequence
|
|
axis. It will have a rank one less than the rank of the corresponding scan_input.
|
|
|
|
The scan operation returns the final values of the state_variables as well as the
|
|
scan_outputs.
|
|
|
|
The optional attribute scan_input_directions specifies the direction (forward or backward)
|
|
for each scan input. If this attribute is omitted, all sequences are scanned in the forward
|
|
direction. A bidirectional scan may be performed by specifying the same tensor input twice
|
|
in the scan_inputs, once with a forward direction, and once with a backward direction.
|
|
|
|
The scan_output of the operation is produced by concatenating the scan_output_element
|
|
values produced by the body in each iteration. The optional attribute scan_output_directions
|
|
specifies the direction in which scan_output is constructed (by appending or prepending the
|
|
scan_output_element to scan_output in each iteration) for each scan_output. If this attribute
|
|
is omitted, the scan_output_element is appended to the scan_output in each iteration.
|
|
|
|
The optional attribute scan_input_axes specifies the axis to be scanned for each scan_input.
|
|
If omitted, every scan_input will be scanned in axis 0. For example, if axis 0 is the
|
|
batch axis and axis 1 is the time axis (to be scanned), specify an axis value of 1.
|
|
Note that scanning a non-zero axis may be less efficient than scanning axis zero.
|
|
|
|
The optional attribute scan_output_axes specifies the axis along which the scan_outputs
|
|
are accumulated for each scan_output. For example, if axis 1 is the time axis (to be
|
|
scanned) for both inputs and outputs, specify a scan_input axis and scan_output axis
|
|
value of 1.
|
|
|
|
Note that because of the ONNX restriction that only the last parameter of an operator can
|
|
be variadic, the initial-states and scan-inputs are listed together as one input parameter.
|
|
Similarly, the final-states and scan-outputs are listed together as one output parameter.
|
|
The attribute num_scan_inputs indicates the number M of scan-inputs.
|
|
|
|
The behavior of
|
|
|
|
Scan <
|
|
num_scan_inputs = m,
|
|
body = loop-body,
|
|
scan_input_axes = [axis_1, ..., axis_m]
|
|
> (init_1, ..., init_n, scan_1, ..., scan_m)
|
|
|
|
is equivalent to the following pseudo-code:
|
|
|
|
// scan_i.shape[axis_i] denotes the (max) sequence-length of scan_i
|
|
// scan_i.shape[axis_i] is required to be equal to scan_j.shape[axis_j] for all i,j.
|
|
sequence_length = scan_1.shape[axis_1];
|
|
|
|
// initialize state-variables
|
|
st_1 = init_1; ... st_n = init_n;
|
|
// initialize scan-output variables: [] denotes an empty tensor
|
|
scan_out_1 = []; ...; scan_out_k = [];
|
|
// identify number of iterations:
|
|
|
|
// execute loop
|
|
for (int t = 0; t < sequence_length; ++t) {
|
|
// generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor
|
|
// of rank one less than T obtained by indexing T at position t along axis k.
|
|
si_1 = scan_1<axis=axis_1>[t];
|
|
... ;
|
|
si_m = scan_m<axis=axis_m>[t];
|
|
// execute loop-body
|
|
st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)
|
|
// accumulate the scan-output elements
|
|
scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);
|
|
}
|
|
|
|
return st_1, ..., st_n, scan_out_1, ..., scan_out_k;
|
|
|
|
*Sample usage: Encoding RNN using a Scan*
|
|
|
|
The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,
|
|
recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can
|
|
be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes
|
|
%Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these
|
|
values are computed in the outer graph, they need to be passed in as extra state_variables.
|
|
|
|
graph rnn-encoding {
|
|
%H_0 = ...
|
|
%X = ...
|
|
%Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1](%H_0, %X)
|
|
return %Y, %Y_h
|
|
}
|
|
|
|
graph rnn-cell-1 (
|
|
%H_tminus1[FLOAT, tensor]
|
|
%X_t[FLOAT, tensor]
|
|
) {
|
|
%Wi = ...
|
|
%Ri = ...
|
|
%Wbi = ...
|
|
%Rbi = ...
|
|
%t1 = X_t * (Wi^T)
|
|
%t2 = H_tminus1*(Ri^T)
|
|
%t3 = Add(%t1, %t2)
|
|
%t4 = Add(%t3, %Wbi)
|
|
%t5 = Add(%t4, %Rbi)
|
|
%Ht = Tanh(%t5)
|
|
%Accumulate = Identity(%Ht)
|
|
return %Ht, %Accumulate
|
|
}
|
|
|
|
)DOC";
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Scan,
|
|
9,
|
|
OpSchema()
|
|
.SetDoc(scan_9_doc)
|
|
.Input(
|
|
0,
|
|
"initial_state_and_scan_inputs",
|
|
"Initial values of the loop's N state variables followed by M scan_inputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Output(
|
|
0,
|
|
"final_state_and_scan_outputs",
|
|
"Final values of the loop's N state variables followed by K scan_outputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has N+M inputs: "
|
|
"(loop state variables..., scan_input_elts...). It has N+K outputs: "
|
|
"(loop state variables..., scan_output_elts...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"scan_output_elt value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions of these values change across loop iterations.",
|
|
AttributeProto::GRAPH,
|
|
true)
|
|
.Attr("num_scan_inputs", "An attribute specifying the number of scan_inputs M. ", AttributeProto::INT, true)
|
|
.Attr(
|
|
"scan_input_directions",
|
|
"An optional list of M flags. The i-th element of the list specifies the direction "
|
|
"to be scanned for the i-th scan_input tensor: 0 indicates forward direction and 1 "
|
|
"indicates reverse direction. "
|
|
"If omitted, all scan_input tensors will be scanned in the forward direction.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_directions",
|
|
"An optional list of K flags, one for each scan_output. The i-th element of the list "
|
|
"specifies whether the i-th scan_output should be constructed by appending or "
|
|
"prepending a new value in each iteration: 0 indicates appending and 1 "
|
|
"indicates prepending. "
|
|
"If omitted, all scan_output tensors will be produced by appending a value "
|
|
"in each iteration.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_input_axes",
|
|
"An optional list of M flags. The i-th element of the list specifies the axis "
|
|
"to be scanned (the sequence axis) for the i-th scan_input. If omitted, 0 will "
|
|
"be used as the scan axis for every scan_input.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_axes",
|
|
"An optional list of K flags. The i-th element of the list specifies the axis "
|
|
"for the i-th scan_output. The scan outputs are accumulated along the specified "
|
|
"axis. If omitted, 0 will be used as the scan axis for every scan_output.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types(), "All Tensor types")
|
|
.TypeAndShapeInferenceFunction(ScanInferenceFunctionOpset9));
|
|
|
|
void IfInferenceFunction1(InferenceContext& ctx) {
|
|
// there are no inputs so we just need to run the subgraph inferencing for
|
|
// then/else subgraphs and apply those to the outputs.
|
|
std::vector<const TypeProto*> subgraph_input_types; // none
|
|
std::vector<const TensorProto*> input_data; // none
|
|
|
|
std::vector<const TypeProto*> then_output_types;
|
|
std::vector<const TypeProto*> else_output_types;
|
|
|
|
// Run inferencing on the subgraph
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("then_branch");
|
|
if (graphInferencer) {
|
|
then_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
graphInferencer = ctx.getGraphAttributeInferencer("else_branch");
|
|
if (graphInferencer) {
|
|
else_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
auto num_then_outputs = then_output_types.size();
|
|
auto num_else_outputs = else_output_types.size();
|
|
|
|
// the output types for then and else should be the same
|
|
if (num_then_outputs != num_else_outputs) {
|
|
fail_type_inference(
|
|
"then_branch and else_branch produce different number of outputs. ",
|
|
num_then_outputs,
|
|
" != ",
|
|
num_else_outputs);
|
|
}
|
|
|
|
if (num_then_outputs != num_outputs) {
|
|
fail_type_inference("If node has ", num_outputs, " but subgraphs produce ", num_then_outputs);
|
|
}
|
|
|
|
for (size_t i = 0, end = then_output_types.size(); i < end; ++i) {
|
|
auto then_output = then_output_types[i];
|
|
auto else_output = else_output_types[i];
|
|
|
|
if (then_output->value_case() != else_output->value_case()) {
|
|
fail_type_inference(
|
|
"Mismatched type for output ", i, " then=", then_output->value_case(), " else=", else_output->value_case());
|
|
}
|
|
|
|
auto* if_output = ctx.getOutputType(i);
|
|
*if_output = *then_output;
|
|
|
|
if (then_output->has_tensor_type()) {
|
|
auto then_elem_type = then_output->tensor_type().elem_type();
|
|
auto else_elem_type = else_output->tensor_type().elem_type();
|
|
|
|
if (then_elem_type != else_elem_type) {
|
|
fail_type_inference(
|
|
"Mismatched tensor element type for output ", i, " then=", then_elem_type, " else=", else_elem_type);
|
|
}
|
|
|
|
// merge the 'else' shape information to check it's consistent and
|
|
// augment the 'if' output if possible
|
|
mergeInShapeInfo(else_output->tensor_type(), *if_output->mutable_tensor_type());
|
|
}
|
|
}
|
|
}
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
If,
|
|
1,
|
|
OpSchema()
|
|
.SetDoc("If conditional")
|
|
.Input(0, "cond", "Condition for the if. The tensor must contain a single element.", "B")
|
|
.Output(
|
|
0,
|
|
"outputs",
|
|
"Values that are live-out to the enclosing scope. The return values in "
|
|
"the `then_branch` and `else_branch` must be of the same shape and same "
|
|
"data type.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"then_branch",
|
|
"Graph to run if condition is true. Has N outputs: values you wish to "
|
|
"be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the else_branch.",
|
|
AttributeProto::GRAPH)
|
|
.Attr(
|
|
"else_branch",
|
|
"Graph to run if condition is false. Has N outputs: values you wish to"
|
|
" be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the then_branch.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types(), "All Tensor types")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "Only bool")
|
|
.TypeAndShapeInferenceFunction(IfInferenceFunction1));
|
|
|
|
void IfInferenceFunction_11(InferenceContext& ctx) {
|
|
// there are no inputs so we just need to run the subgraph inferencing for
|
|
// then/else subgraphs and apply those to the outputs.
|
|
std::vector<const TypeProto*> subgraph_input_types; // none
|
|
std::vector<const TensorProto*> input_data; // none
|
|
|
|
std::vector<const TypeProto*> then_output_types;
|
|
std::vector<const TypeProto*> else_output_types;
|
|
|
|
// Run inferencing on the subgraph
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("then_branch");
|
|
if (graphInferencer) {
|
|
then_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
graphInferencer = ctx.getGraphAttributeInferencer("else_branch");
|
|
if (graphInferencer) {
|
|
else_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
auto num_then_outputs = then_output_types.size();
|
|
auto num_else_outputs = else_output_types.size();
|
|
|
|
// the output types for then and else should be the same
|
|
if (num_then_outputs != num_else_outputs) {
|
|
fail_type_inference(
|
|
"then_branch and else_branch produce different number of outputs. ",
|
|
num_then_outputs,
|
|
" != ",
|
|
num_else_outputs);
|
|
}
|
|
|
|
if (num_then_outputs != num_outputs) {
|
|
fail_type_inference("If node has ", num_outputs, " but subgraphs produce ", num_then_outputs);
|
|
}
|
|
|
|
for (size_t i = 0, end = then_output_types.size(); i < end; ++i) {
|
|
auto then_output = then_output_types[i];
|
|
auto else_output = else_output_types[i];
|
|
|
|
if (then_output->value_case() != else_output->value_case()) {
|
|
fail_type_inference(
|
|
"Mismatched type for output ", i, " then=", then_output->value_case(), " else=", else_output->value_case());
|
|
}
|
|
|
|
auto* if_output = ctx.getOutputType(i);
|
|
*if_output = *then_output;
|
|
|
|
if (then_output->has_tensor_type()) {
|
|
auto then_elem_type = then_output->tensor_type().elem_type();
|
|
auto else_elem_type = else_output->tensor_type().elem_type();
|
|
|
|
if (then_elem_type != else_elem_type) {
|
|
fail_type_inference(
|
|
"Mismatched tensor element type for output ", i, " then=", then_elem_type, " else=", else_elem_type);
|
|
}
|
|
|
|
UnionShapeInfo(else_output->tensor_type().shape(), *if_output->mutable_tensor_type());
|
|
}
|
|
}
|
|
}
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
If,
|
|
11,
|
|
OpSchema()
|
|
.SetDoc("If conditional")
|
|
.Input(0, "cond", "Condition for the if. The tensor must contain a single element.", "B")
|
|
.Output(
|
|
0,
|
|
"outputs",
|
|
"Values that are live-out to the enclosing scope. The return values in "
|
|
"the `then_branch` and `else_branch` must be of the same data type. "
|
|
"The `then_branch` and `else_branch` may produce tensors with the same "
|
|
"element type and different shapes. "
|
|
"If corresponding outputs from the then-branch and the else-branch have "
|
|
"static shapes S1 and S2, then the shape of the corresponding output "
|
|
"variable of the if-node (if present) must be compatible with both S1 "
|
|
"and S2 as it represents the union of both possible shapes."
|
|
"For example, if in a model file, the first "
|
|
"output of `then_branch` is typed float tensor with shape [2] and the "
|
|
"first output of `else_branch` is another float tensor with shape [3], "
|
|
"If's first output should have (a) no shape set, or (b) "
|
|
"a shape of rank 1 with neither `dim_value` nor `dim_param` set, or (c) "
|
|
"a shape of rank 1 with a unique `dim_param`. "
|
|
"In contrast, the first output cannot have the shape [2] since [2] and "
|
|
"[3] are not compatible.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"then_branch",
|
|
"Graph to run if condition is true. Has N outputs: values you wish to "
|
|
"be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the else_branch.",
|
|
AttributeProto::GRAPH)
|
|
.Attr(
|
|
"else_branch",
|
|
"Graph to run if condition is false. Has N outputs: values you wish to"
|
|
" be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the then_branch.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types(), "All Tensor types")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "Only bool")
|
|
.TypeAndShapeInferenceFunction(IfInferenceFunction_11));
|
|
|
|
void IfInferenceFunction_13(InferenceContext& ctx) {
|
|
// there are no inputs so we just need to run the subgraph inferencing for
|
|
// then/else subgraphs and apply those to the outputs.
|
|
std::vector<const TypeProto*> subgraph_input_types; // none
|
|
std::vector<const TensorProto*> input_data; // none
|
|
|
|
std::vector<const TypeProto*> then_output_types;
|
|
std::vector<const TypeProto*> else_output_types;
|
|
|
|
// Run inferencing on the subgraph
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("then_branch");
|
|
if (graphInferencer) {
|
|
then_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
graphInferencer = ctx.getGraphAttributeInferencer("else_branch");
|
|
if (graphInferencer) {
|
|
else_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
auto num_then_outputs = then_output_types.size();
|
|
auto num_else_outputs = else_output_types.size();
|
|
|
|
// the output types for then and else should be the same
|
|
if (num_then_outputs != num_else_outputs) {
|
|
fail_type_inference(
|
|
"then_branch and else_branch produce different number of outputs. ",
|
|
num_then_outputs,
|
|
" != ",
|
|
num_else_outputs);
|
|
}
|
|
|
|
if (num_then_outputs != num_outputs) {
|
|
fail_type_inference("If node has ", num_outputs, " but subgraphs produce ", num_then_outputs);
|
|
}
|
|
|
|
for (size_t i = 0, end = then_output_types.size(); i < end; ++i) {
|
|
auto then_output = then_output_types[i];
|
|
auto else_output = else_output_types[i];
|
|
|
|
auto* if_output = ctx.getOutputType(i);
|
|
*if_output = *then_output;
|
|
|
|
UnionTypeInfo(*else_output, *if_output);
|
|
}
|
|
}
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
If,
|
|
13,
|
|
OpSchema()
|
|
.SetDoc("If conditional")
|
|
.Input(0, "cond", "Condition for the if. The tensor must contain a single element.", "B")
|
|
.Output(
|
|
0,
|
|
"outputs",
|
|
"Values that are live-out to the enclosing scope. The return values in "
|
|
"the `then_branch` and `else_branch` must be of the same data type. "
|
|
"The `then_branch` and `else_branch` may produce tensors with the same "
|
|
"element type and different shapes. "
|
|
"If corresponding outputs from the then-branch and the else-branch have "
|
|
"static shapes S1 and S2, then the shape of the corresponding output "
|
|
"variable of the if-node (if present) must be compatible with both S1 "
|
|
"and S2 as it represents the union of both possible shapes."
|
|
"For example, if in a model file, the first "
|
|
"output of `then_branch` is typed float tensor with shape [2] and the "
|
|
"first output of `else_branch` is another float tensor with shape [3], "
|
|
"If's first output should have (a) no shape set, or (b) "
|
|
"a shape of rank 1 with neither `dim_value` nor `dim_param` set, or (c) "
|
|
"a shape of rank 1 with a unique `dim_param`. "
|
|
"In contrast, the first output cannot have the shape [2] since [2] and "
|
|
"[3] are not compatible.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"then_branch",
|
|
"Graph to run if condition is true. Has N outputs: values you wish to "
|
|
"be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the else_branch.",
|
|
AttributeProto::GRAPH)
|
|
.Attr(
|
|
"else_branch",
|
|
"Graph to run if condition is false. Has N outputs: values you wish to"
|
|
" be live-out to the enclosing scope. The number of outputs must match"
|
|
" the number of outputs in the then_branch.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint(
|
|
"V",
|
|
[]() {
|
|
auto t = OpSchema::all_tensor_types();
|
|
auto s = OpSchema::all_tensor_sequence_types();
|
|
t.insert(t.end(), s.begin(), s.end());
|
|
return t;
|
|
}(),
|
|
"All Tensor and Sequence types")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "Only bool")
|
|
.TypeAndShapeInferenceFunction(IfInferenceFunction_13));
|
|
|
|
void LoopInferenceFunction_13(InferenceContext& ctx) {
|
|
auto num_inputs = ctx.getNumInputs();
|
|
assert(num_inputs >= 2);
|
|
auto num_loop_state_vars = num_inputs - 2; // skip 'M' and 'cond'
|
|
|
|
std::vector<const TypeProto*> subgraph_input_types;
|
|
subgraph_input_types.reserve(num_inputs);
|
|
|
|
std::vector<TypeProto> temporary_type_protos;
|
|
temporary_type_protos.reserve(num_inputs - 2);
|
|
|
|
// create TypeProto to validate iteration number type is the same as the
|
|
// optional 'M' input for max iterations.
|
|
TypeProto iter_num_type;
|
|
iter_num_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT64);
|
|
subgraph_input_types.push_back(&iter_num_type);
|
|
|
|
// 'cond'
|
|
subgraph_input_types.push_back(ctx.getInputType(1));
|
|
|
|
// loop state value types get propagated to outputs, but shape may change
|
|
// across iterations so don't propagate it to the outputs and don't pass it
|
|
// into the subgraph inferencing
|
|
for (size_t i = 2; i < num_inputs; ++i) {
|
|
propagateElemTypeFromInputToOutput(ctx, i, i - 2);
|
|
|
|
// copy so we can remove the shape before passing to the subgraph
|
|
// inferencing
|
|
temporary_type_protos.push_back(*ctx.getInputType(i));
|
|
auto& input_type = temporary_type_protos.back();
|
|
|
|
if (input_type.has_tensor_type()) {
|
|
input_type.mutable_tensor_type()->clear_shape();
|
|
} else if (input_type.has_sequence_type()) {
|
|
auto& seq_type = *input_type.mutable_sequence_type();
|
|
if (seq_type.has_elem_type() && seq_type.elem_type().has_tensor_type()) {
|
|
seq_type.mutable_elem_type()->mutable_tensor_type()->clear_shape();
|
|
}
|
|
}
|
|
|
|
subgraph_input_types.push_back(&input_type);
|
|
}
|
|
|
|
// Run inferencing on the subgraph
|
|
std::vector<const TypeProto*> subgraph_output_types;
|
|
|
|
GraphInferencer* graphInferencer = ctx.getGraphAttributeInferencer("body");
|
|
if (graphInferencer) {
|
|
std::vector<const TensorProto*> input_data;
|
|
input_data.push_back(nullptr); // iteration number
|
|
for (size_t i = 1; i < num_inputs; ++i) {
|
|
input_data.push_back(ctx.getInputData(i));
|
|
}
|
|
|
|
subgraph_output_types = graphInferencer->doInferencing(subgraph_input_types, input_data);
|
|
}
|
|
|
|
// if empty(), assume inferencing was skipped
|
|
if (!subgraph_output_types.empty()) {
|
|
auto num_outputs = ctx.getNumOutputs();
|
|
|
|
// subgraph outputs the condition value first but that is only used
|
|
// internally and not returned by Loop.
|
|
if (subgraph_output_types.size() != num_outputs + 1) {
|
|
fail_type_inference(
|
|
"Graph attribute inferencing returned type information for ",
|
|
subgraph_output_types.size(),
|
|
" outputs. Expected ",
|
|
num_outputs + 1);
|
|
}
|
|
|
|
// check loop state values match. we should already have type/shape info
|
|
for (size_t i = 0; i < num_outputs; ++i) {
|
|
auto* subgraph_output_type = subgraph_output_types[i + 1]; // skip 'cond'
|
|
auto* loop_output_type = ctx.getOutputType(i);
|
|
|
|
const bool is_loop_state_var = i < num_loop_state_vars;
|
|
|
|
if (!subgraph_output_type->has_tensor_type() && !subgraph_output_type->has_sequence_type()) {
|
|
fail_type_inference(
|
|
"Loop 'body' subgraph outputs should all be tensors or sequences but output ",
|
|
i,
|
|
" was ",
|
|
subgraph_output_type->value_case());
|
|
}
|
|
|
|
if (!is_loop_state_var && !subgraph_output_type->has_tensor_type()) {
|
|
fail_type_inference(
|
|
"Loop 'body' subgraph scan outputs should all be tensors but output ",
|
|
i,
|
|
" was ",
|
|
subgraph_output_type->value_case());
|
|
}
|
|
|
|
// if there's an existing type check it matches. otherwise propagate
|
|
propagateElemTypeWithValidation(subgraph_output_type, loop_output_type);
|
|
|
|
if (is_loop_state_var) {
|
|
// shape may change across iterations so ignore.
|
|
} else {
|
|
// propagate shape
|
|
if (subgraph_output_type->tensor_type().has_shape()) {
|
|
// per iteration output. first dimension will be number of iterations
|
|
// but we don't know that value yet
|
|
TypeProto inferred_type(*subgraph_output_type);
|
|
auto* mutable_inferred_tensor_type = inferred_type.mutable_tensor_type();
|
|
auto* mutable_inferred_shape = mutable_inferred_tensor_type->mutable_shape();
|
|
|
|
mutable_inferred_shape->clear_dim();
|
|
|
|
// add empty dimension for number of iterations
|
|
mutable_inferred_shape->add_dim();
|
|
|
|
// add dimensions from subgraph output shape
|
|
for (const auto& dim : subgraph_output_type->tensor_type().shape().dim()) {
|
|
(*mutable_inferred_shape->add_dim()) = dim;
|
|
}
|
|
|
|
mergeInShapeInfo(*mutable_inferred_tensor_type, *loop_output_type->mutable_tensor_type());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static const char* Loop_ver13_doc = R"DOC(
|
|
Generic Looping construct. This loop has multiple termination conditions:
|
|
|
|
1) Trip count. Iteration count specified at runtime. Set by
|
|
specifying the input M. Optional. Set to empty string to omit.
|
|
Note that a static trip count (specified at graph construction time) can be
|
|
specified by passing in a constant node for input M.
|
|
2) Loop termination condition. This is an input to the op that determines
|
|
whether to run the first iteration and also a loop-carried dependency for
|
|
the body graph. The body graph must yield a value for the condition variable,
|
|
whether this input is provided or not.
|
|
|
|
This table summarizes the operating modes of this operator with equivalent
|
|
C-style code:
|
|
|
|
Operator inputs defined as (max_trip_count, condition_var).
|
|
|
|
input ("", ""):
|
|
for (int i=0; ; ++i) {
|
|
cond = ... // Note this value is ignored, but is required in the body
|
|
}
|
|
|
|
input ("", cond) // Note this is analogous to a while loop
|
|
bool cond = ...;
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
input ("", 1) // Note this is analogous to a do-while loop
|
|
bool cond = true
|
|
for (int i=0; cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
input (trip_count, "") // Note this is analogous to a for loop
|
|
int trip_count = ...
|
|
for (int i=0; i < trip_count; ++i) {
|
|
cond = ...; // ignored
|
|
}
|
|
|
|
input (trip_count, cond)
|
|
int trip_count = ...;
|
|
bool cond = ...;
|
|
for (int i=0; i < trip_count && cond; ++i) {
|
|
cond = ...;
|
|
}
|
|
|
|
|
|
*Sample usage - cond as well as trip count*
|
|
|
|
graph predict-net {
|
|
%a = Constant[value = <Scalar Tensor [3]>]()
|
|
%b = Constant[value = <Scalar Tensor [6]>]()
|
|
%keepgoing = Constant[value = <Scalar Tensor [1]>]()
|
|
%max_trip_count = Constant[value = <Scalar Tensor [10]>]()
|
|
%keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
|
|
return
|
|
}
|
|
|
|
graph body-net (
|
|
%i[INT32, scalar] // iteration number
|
|
%keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used
|
|
%b_in[INT32, scalar] // incoming value of loop-carried-dependency b
|
|
) {
|
|
%my_local = Add(%a, %b_in)
|
|
%b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b
|
|
%keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition
|
|
%user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated
|
|
return %keepgoing_out, %b_out, %user_defined_val
|
|
}
|
|
|
|
*Sample equivalent C code*
|
|
|
|
{
|
|
/* User-defined code (enclosing scope) */
|
|
int a = 3, b = 6;
|
|
bool keepgoing = true; // Analogous to input cond
|
|
/* End user-defined code */
|
|
|
|
/* Implicitly-defined code */
|
|
const int max_trip_count = 10; // Analogous to input M
|
|
int user_defined_vals[]; // Imagine this is resizable
|
|
/* End implicitly-defined code */
|
|
/* initialize loop-carried variables and scan-output variables */
|
|
bool keepgoing_out = keepgoing
|
|
int b_out = b
|
|
|
|
for (int i=0; i < max_trip_count && keepgoing_out; ++i) {
|
|
/* Implicitly-defined code: bind actual parameter values
|
|
to formal parameter variables of loop-body */
|
|
bool keepgoing_in = keepgoing_out;
|
|
bool b_in = b_out;
|
|
|
|
/* User-defined code (loop body) */
|
|
int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
|
|
b_out = a - b_in;
|
|
keepgoing_out = my_local > b_out;
|
|
user_defined_val = b_in + b_in; // b_in and b_out are different variables
|
|
/* End user-defined code */
|
|
|
|
/* Implicitly defined-code */
|
|
user_defined_vals[i] = user_defined_val // accumulate scan-output values
|
|
}
|
|
// int t = my_local; // Can't do this. my_local is not accessible here.
|
|
|
|
// The values below are bound to the output variables of the loop and therefore accessible
|
|
// b_out; user_defined_vals; keepgoing_out;
|
|
}
|
|
|
|
There are several things of note in this code snippet:
|
|
|
|
1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can
|
|
be referenced in the inputs of the loop.
|
|
2) Any values computed in the loop body that needs to be used in a subsequent
|
|
iteration or after the loop are modelled using a pair of variables in the loop-body,
|
|
consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
|
|
These are referred to as loop-carried dependences. The loop operation node
|
|
supplies the input value of the input variable for the first iteration, and
|
|
returns the output value of the output variable produced by the final
|
|
iteration.
|
|
3) Scan_output variables are used to implicitly concatenate values computed across
|
|
all the iterations. In the above example, the value of user_defined_val computed
|
|
over all iterations are concatenated and returned as the value of user_defined_vals
|
|
after the loop.
|
|
4) Values created in the body cannot be accessed in the enclosing scope,
|
|
except using the mechanism described above.
|
|
|
|
Note that the semantics of this op support "diagonal" or "wavefront" execution.
|
|
(See Step 3 here for an example:
|
|
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
|
|
Frontends should emit multi-layer RNNs as a series of While operators (with
|
|
time being the inner looping dimension), with each successive layer consuming
|
|
the scan_outputs from the previous layer, possibly going through several
|
|
point-wise operators (e.g. dropout, residual connections, linear layer).
|
|
|
|
The input/output of subgraph (produced by loop node) matching is based on order instead of name. The implementation will figure out the names based on this order.
|
|
)DOC";
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Loop,
|
|
13,
|
|
OpSchema()
|
|
.SetDoc(Loop_ver13_doc)
|
|
.Input(
|
|
0,
|
|
"M",
|
|
"A maximum trip-count for the loop specified at runtime. Optional."
|
|
" Pass empty string to skip.",
|
|
"I",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
1,
|
|
"cond",
|
|
"A boolean termination condition. Optional. Pass empty string to skip.",
|
|
"B",
|
|
OpSchema::Optional)
|
|
.Input(
|
|
2,
|
|
"v_initial",
|
|
"The initial values of any loop-carried dependencies (values that "
|
|
"change across loop iterations)",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false,
|
|
0)
|
|
.Output(
|
|
0,
|
|
"v_final_and_scan_outputs",
|
|
"Final N loop carried dependency values then K scan_outputs. "
|
|
"Scan outputs must be Tensors.",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has 2+N inputs: (iteration_num, "
|
|
"condition, loop carried dependencies...). It has 1+N+K outputs: "
|
|
"(condition, loop carried dependencies..., scan_outputs...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"output value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions or data type of these scan_outputs change across loop"
|
|
" iterations.",
|
|
AttributeProto::GRAPH)
|
|
.TypeConstraint(
|
|
"V",
|
|
[]() {
|
|
auto t = OpSchema::all_tensor_types();
|
|
auto s = OpSchema::all_tensor_sequence_types();
|
|
t.insert(t.end(), s.begin(), s.end());
|
|
return t;
|
|
}(),
|
|
"All Tensor and Sequence types")
|
|
.TypeConstraint("I", {"tensor(int64)"}, "tensor of int64, which should be a scalar.")
|
|
.TypeConstraint("B", {"tensor(bool)"}, "tensor of bool, which should be a scalar.")
|
|
.TypeAndShapeInferenceFunction(LoopInferenceFunction_13));
|
|
|
|
static const char* scan_11_doc = R"DOC(
|
|
Scan can be used to iterate over one or more scan_input tensors,
|
|
constructing zero or more scan_output tensors. It combines ideas from general recurrences,
|
|
functional programming constructs such as scan, fold, map, and zip, and is intended to enable
|
|
generalizations of RNN-like constructs for sequence-to-sequence processing.
|
|
Other tensors (referred to as state_variables here) can be used to carry a state
|
|
when iterating from one element to another (similar to hidden-state in RNNs, also referred
|
|
to as loop-carried dependences in the context of loops).
|
|
Many common usages involve a single scan_input tensor (where functionality
|
|
similar to scan, fold and map can be obtained). When more than one scan_input is used,
|
|
a behavior similar to zip is obtained.
|
|
|
|
The attribute body must be a graph, specifying the computation to be performed in
|
|
every iteration. It takes as input the current values of the state_variables and
|
|
the current iterated element of the scan_inputs. It must return the (updated) values
|
|
of the state_variables and zero or more scan_output_element tensors. The values of the
|
|
scan_output_element tensors are concatenated over all the iterations to produce the
|
|
scan_output values of the scan construct (similar to the concatenated intermediate
|
|
hidden-state values of RNN-like constructs). All the output tensors (state_variables as
|
|
well as scan_output_element tensors) are required to have the same shape in each iteration
|
|
of the loop (a restriction imposed to enable efficient memory allocation).
|
|
|
|
Note that the iterated element passed to the body subgraph does not have a sequence
|
|
axis. It will have a rank one less than the rank of the corresponding scan_input.
|
|
|
|
The scan operation returns the final values of the state_variables as well as the
|
|
scan_outputs.
|
|
|
|
The optional attribute scan_input_directions specifies the direction (forward or backward)
|
|
for each scan input. If this attribute is omitted, all sequences are scanned in the forward
|
|
direction. A bidirectional scan may be performed by specifying the same tensor input twice
|
|
in the scan_inputs, once with a forward direction, and once with a backward direction.
|
|
|
|
The scan_output of the operation is produced by concatenating the scan_output_element
|
|
values produced by the body in each iteration. The optional attribute scan_output_directions
|
|
specifies the direction in which scan_output is constructed (by appending or prepending the
|
|
scan_output_element to scan_output in each iteration) for each scan_output. If this attribute
|
|
is omitted, the scan_output_element is appended to the scan_output in each iteration.
|
|
|
|
The optional attribute scan_input_axes specifies the axis to be scanned for each scan_input.
|
|
If omitted, every scan_input will be scanned in axis 0. For example, if axis 0 is the
|
|
batch axis and axis 1 is the time axis (to be scanned), specify an axis value of 1.
|
|
Note that scanning a non-zero axis may be less efficient than scanning axis zero.
|
|
|
|
The optional attribute scan_output_axes specifies the axis along which the scan_outputs
|
|
are accumulated for each scan_output. For example, if axis 1 is the time axis (to be
|
|
scanned) for both inputs and outputs, specify a scan_input axis and scan_output axis
|
|
value of 1.
|
|
|
|
Note that because of the ONNX restriction that only the last parameter of an operator can
|
|
be variadic, the initial-states and scan-inputs are listed together as one input parameter.
|
|
Similarly, the final-states and scan-outputs are listed together as one output parameter.
|
|
The attribute num_scan_inputs indicates the number M of scan-inputs.
|
|
|
|
The behavior of
|
|
|
|
Scan <
|
|
num_scan_inputs = m,
|
|
body = loop-body,
|
|
scan_input_axes = [axis_1, ..., axis_m]
|
|
> (init_1, ..., init_n, scan_1, ..., scan_m)
|
|
|
|
is equivalent to the following pseudo-code:
|
|
|
|
// scan_i.shape[axis_i] denotes the (max) sequence-length of scan_i
|
|
// scan_i.shape[axis_i] is required to be equal to scan_j.shape[axis_j] for all i,j.
|
|
sequence_length = scan_1.shape[axis_1];
|
|
|
|
// initialize state-variables
|
|
st_1 = init_1; ... st_n = init_n;
|
|
// initialize scan-output variables: [] denotes an empty tensor
|
|
scan_out_1 = []; ...; scan_out_k = [];
|
|
// identify number of iterations:
|
|
|
|
// execute loop
|
|
for (int t = 0; t < sequence_length; ++t) {
|
|
// generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor
|
|
// of rank one less than T obtained by indexing T at position t along axis k.
|
|
si_1 = scan_1<axis=axis_1>[t];
|
|
... ;
|
|
si_m = scan_m<axis=axis_m>[t];
|
|
// execute loop-body
|
|
st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)
|
|
// accumulate the scan-output elements
|
|
scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);
|
|
}
|
|
|
|
return st_1, ..., st_n, scan_out_1, ..., scan_out_k;
|
|
|
|
*Sample usage: Encoding RNN using a Scan*
|
|
|
|
The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,
|
|
recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can
|
|
be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes
|
|
%Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these
|
|
values are computed in the outer graph, they need to be passed in as extra state_variables.
|
|
|
|
graph rnn-encoding {
|
|
%H_0 = ...
|
|
%X = ...
|
|
%Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1](%H_0, %X)
|
|
return %Y, %Y_h
|
|
}
|
|
|
|
graph rnn-cell-1 (
|
|
%H_tminus1[FLOAT, tensor]
|
|
%X_t[FLOAT, tensor]
|
|
) {
|
|
%Wi = ...
|
|
%Ri = ...
|
|
%Wbi = ...
|
|
%Rbi = ...
|
|
%t1 = X_t * (Wi^T)
|
|
%t2 = H_tminus1*(Ri^T)
|
|
%t3 = Add(%t1, %t2)
|
|
%t4 = Add(%t3, %Wbi)
|
|
%t5 = Add(%t4, %Rbi)
|
|
%Ht = Tanh(%t5)
|
|
%Accumulate = Identity(%Ht)
|
|
return %Ht, %Accumulate
|
|
}
|
|
|
|
)DOC";
|
|
|
|
extern void ScanInferenceFunction(InferenceContext& ctx);
|
|
|
|
ONNX_OPERATOR_SET_SCHEMA(
|
|
Scan,
|
|
11,
|
|
OpSchema()
|
|
.SetDoc(scan_11_doc)
|
|
.Input(
|
|
0,
|
|
"initial_state_and_scan_inputs",
|
|
"Initial values of the loop's N state variables followed by M scan_inputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Output(
|
|
0,
|
|
"final_state_and_scan_outputs",
|
|
"Final values of the loop's N state variables followed by K scan_outputs",
|
|
"V",
|
|
OpSchema::Variadic,
|
|
false)
|
|
.Attr(
|
|
"body",
|
|
"The graph run each iteration. It has N+M inputs: "
|
|
"(loop state variables..., scan_input_elts...). It has N+K outputs: "
|
|
"(loop state variables..., scan_output_elts...). Each "
|
|
"scan_output is created by concatenating the value of the specified "
|
|
"scan_output_elt value at the end of each iteration of the loop. It is an error"
|
|
" if the dimensions of these values change across loop iterations.",
|
|
AttributeProto::GRAPH,
|
|
true)
|
|
.Attr("num_scan_inputs", "An attribute specifying the number of scan_inputs M. ", AttributeProto::INT, true)
|
|
.Attr(
|
|
"scan_input_directions",
|
|
"An optional list of M flags. The i-th element of the list specifies the direction "
|
|
"to be scanned for the i-th scan_input tensor: 0 indicates forward direction and 1 "
|
|
"indicates reverse direction. "
|
|
"If omitted, all scan_input tensors will be scanned in the forward direction.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_directions",
|
|
"An optional list of K flags, one for each scan_output. The i-th element of the list "
|
|
"specifies whether the i-th scan_output should be constructed by appending or "
|
|
"prepending a new value in each iteration: 0 indicates appending and 1 "
|
|
"indicates prepending. "
|
|
"If omitted, all scan_output tensors will be produced by appending a value "
|
|
"in each iteration.",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_input_axes",
|
|
"An optional list of M flags. The i-th element of the list specifies the axis "
|
|
"to be scanned (the sequence axis) for the i-th scan_input. If omitted, 0 will "
|
|
"be used as the scan axis for every scan_input. Negative value for an axis means "
|
|
"counting dimensions from the back. Accepted range is [-r, r-1] where r = rank(input).",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.Attr(
|
|
"scan_output_axes",
|
|
"An optional list of K flags. The i-th element of the list specifies the axis "
|
|
"for the i-th scan_output. The scan outputs are accumulated along the specified "
|
|
"axis. If omitted, 0 will be used as the scan axis for every scan_output. "
|
|
"Negative value for an axis means counting dimensions from the back. Accepted "
|
|
"range is [-r, r-1].",
|
|
AttributeProto::INTS,
|
|
false)
|
|
.TypeConstraint("V", OpSchema::all_tensor_types(), "All Tensor types")
|
|
.TypeAndShapeInferenceFunction(ScanInferenceFunction));
|
|
|
|
} // namespace ONNX_NAMESPACE
|