I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,4 @@
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from onnx.reference.ops.aionnxml._op_list import load_op

View File

@ -0,0 +1,81 @@
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
def compute_logistic(val: float) -> float:
v = 1.0 / (1.0 + np.exp(-np.abs(val)))
return (1.0 - v) if val < 0 else v # type: ignore
logistic = np.vectorize(compute_logistic)
def compute_softmax_zero(values: np.ndarray) -> np.ndarray:
"""The function modifies the input inplace."""
v_max = values.max()
exp_neg_v_max = np.exp(-v_max)
s = 0
for i in range(len(values)):
v = values[i]
if v > 0.0000001 or v < -0.0000001:
values[i] = np.exp(v - v_max)
else:
values[i] *= exp_neg_v_max
s += values[i]
if s == 0:
values[:] = 0.5
else:
values[:] /= s
return values
def softmax_zero(values: np.ndarray) -> np.ndarray:
"""Modifications in place."""
if len(values.shape) == 1:
compute_softmax_zero(values)
return values
for row in values:
compute_softmax_zero(row)
return values
def softmax(values: np.ndarray) -> np.ndarray:
"""Modifications in place."""
if len(values.shape) == 2:
v_max = values.max(axis=1, keepdims=1) # type: ignore
values -= v_max
np.exp(values, out=values)
s = values.sum(axis=1, keepdims=1) # type: ignore
values /= s
return values
v_max = values.max()
values[:] = np.exp(values - v_max)
this_sum = values.sum()
values /= this_sum
return values
def erf_inv(x: float) -> float:
sgn = -1.0 if x < 0 else 1.0
x = (1.0 - x) * (1 + x)
if x == 0:
return 0
log = np.log(x)
v = 2.0 / (np.pi * 0.147) + 0.5 * log
v2 = 1.0 / 0.147 * log
v3 = -v + np.sqrt(v * v - v2)
x = sgn * np.sqrt(v3)
return x
def compute_probit(val: float) -> float:
return 1.41421356 * erf_inv(val * 2 - 1)
probit = np.vectorize(compute_probit)
def expit(x: np.ndarray) -> np.ndarray:
return (1.0 / (1.0 + np.exp(-x))).astype(x.dtype)

View File

@ -0,0 +1,104 @@
# SPDX-License-Identifier: Apache-2.0
# Operator ZipMap is not implemented. Its use should
# be discouraged. It is just a different way to output
# probabilites not consumed by any operator.
from __future__ import annotations
import textwrap
from typing import Any, Dict
from typing import Optional as TOptional
from typing import Union
from onnx.reference.op_run import OpFunction
from onnx.reference.ops._helpers import build_registered_operators_any_domain
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
from onnx.reference.ops.aionnxml.op_array_feature_extractor import ArrayFeatureExtractor
from onnx.reference.ops.aionnxml.op_binarizer import Binarizer
from onnx.reference.ops.aionnxml.op_dict_vectorizer import DictVectorizer
from onnx.reference.ops.aionnxml.op_feature_vectorizer import FeatureVectorizer
from onnx.reference.ops.aionnxml.op_imputer import Imputer
from onnx.reference.ops.aionnxml.op_label_encoder import LabelEncoder
from onnx.reference.ops.aionnxml.op_linear_classifier import LinearClassifier
from onnx.reference.ops.aionnxml.op_linear_regressor import LinearRegressor
from onnx.reference.ops.aionnxml.op_normalizer import Normalizer
from onnx.reference.ops.aionnxml.op_one_hot_encoder import OneHotEncoder
from onnx.reference.ops.aionnxml.op_scaler import Scaler
from onnx.reference.ops.aionnxml.op_svm_classifier import SVMClassifier
from onnx.reference.ops.aionnxml.op_svm_regressor import SVMRegressor
from onnx.reference.ops.aionnxml.op_tree_ensemble import TreeEnsemble
from onnx.reference.ops.aionnxml.op_tree_ensemble_classifier import (
TreeEnsembleClassifier,
)
from onnx.reference.ops.aionnxml.op_tree_ensemble_regressor import TreeEnsembleRegressor
def _build_registered_operators() -> dict[str, dict[int | None, OpRunAiOnnxMl]]:
return build_registered_operators_any_domain(globals().copy()) # type: ignore[return-value]
def load_op(
domain: str,
op_type: str,
version: None | int,
custom: Any = None,
evaluator_cls: type | None = None, # noqa: ARG001
) -> Any:
"""Loads the implemented for a specified operator.
Args:
domain: domain
op_type: oprator type
version: requested version
custom: custom implementation (like a function)
evaluator_cls: unused
Returns:
class
"""
global _registered_operators # noqa: PLW0603
if _registered_operators is None:
_registered_operators = _build_registered_operators() # type: ignore[assignment]
if custom is not None:
return lambda *args: OpFunction(*args, impl=custom) # type: ignore
if domain != "ai.onnx.ml":
raise ValueError(f"Domain must be '' not {domain!r}.")
if op_type not in _registered_operators: # type: ignore
available = "\n".join(textwrap.wrap(", ".join(sorted(_registered_operators)))) # type: ignore
raise NotImplementedError(
f"No registered implementation for operator {op_type!r} "
f"and domain {domain!r} in\n{available}"
)
impl = _registered_operators[op_type] # type: ignore
if None not in impl:
raise RuntimeError(
f"No default implementation for operator {op_type!r} "
f"and domain {domain!r}, found "
f"{', '.join(map(str, impl))}."
)
if version is None or len(impl) == 1:
cl = impl[None]
else:
best = -1
for v in impl:
if v is None:
continue
if best < v <= version:
best = v
if best == -1:
raise RuntimeError(
f"No implementation for operator {op_type!r} "
f"domain {domain!r} and version {version!r}, found "
f"{', '.join(map(str, impl))}."
)
cl = impl[best]
if cl is None:
available = "\n".join(textwrap.wrap(", ".join(sorted(_registered_operators)))) # type: ignore
raise ValueError(
f"Not registered implementation for operator {op_type!r}, "
f"domain {domain!r}, and {version!r} in\n{available}"
)
return cl
_registered_operators: dict[str, dict[int | None, OpRunAiOnnxMl]] | None = None

View File

@ -0,0 +1,8 @@
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from onnx.reference.op_run import OpRun
class OpRunAiOnnxMl(OpRun):
op_domain = "ai.onnx.ml"

View File

@ -0,0 +1,48 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
def _array_feature_extrator(data, indices): # type: ignore
"""Implementation of operator *ArrayFeatureExtractor*."""
if len(indices.shape) == 2 and indices.shape[0] == 1:
index = indices.ravel().tolist()
add = len(index)
elif len(indices.shape) == 1:
index = indices.tolist()
add = len(index)
else:
add = 1
for s in indices.shape:
add *= s
index = indices.ravel().tolist()
if len(data.shape) == 1:
new_shape = (1, add)
else:
new_shape = [*data.shape[:-1], add]
try:
tem = data[..., index]
except IndexError as e:
raise RuntimeError(f"data.shape={data.shape}, indices={indices}") from e
res = tem.reshape(new_shape)
return res
class ArrayFeatureExtractor(OpRunAiOnnxMl):
def _run(self, data, indices): # type: ignore
"""Runtime for operator *ArrayFeatureExtractor*.
Warning:
ONNX specifications may be imprecise in some cases.
When the input data is a vector (one dimension),
the output has still two like a matrix with one row.
The implementation follows what onnxruntime does in
`array_feature_extractor.cc
<https://github.com/microsoft/onnxruntime/blob/main/
onnxruntime/core/providers/cpu/ml/array_feature_extractor.cc#L84>`_.
"""
res = _array_feature_extrator(data, indices)
return (res,)

View File

@ -0,0 +1,15 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
def compute_binarizer(x, threshold=None):
return ((x > threshold).astype(x.dtype),)
class Binarizer(OpRunAiOnnxMl):
def _run(self, x, threshold=None): # type: ignore
return compute_binarizer(x, threshold)

View File

@ -0,0 +1,56 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class DictVectorizer(OpRunAiOnnxMl):
def _run(self, x, int64_vocabulary=None, string_vocabulary=None): # type: ignore
if isinstance(x, (np.ndarray, list)):
dict_labels = {}
if int64_vocabulary:
for i, v in enumerate(int64_vocabulary):
dict_labels[v] = i
else:
for i, v in enumerate(string_vocabulary):
dict_labels[v] = i
if not dict_labels:
raise RuntimeError(
"int64_vocabulary and string_vocabulary cannot be both empty."
)
values_list = []
rows_list = []
cols_list = []
for i, row in enumerate(x):
for k, v in row.items():
values_list.append(v)
rows_list.append(i)
cols_list.append(dict_labels[k])
values = np.array(values_list)
rows = np.array(rows_list)
cols = np.array(cols_list)
res = np.zeros((len(x), len(dict_labels)), dtype=values.dtype) # type: ignore
for r, c, v in zip(rows, cols, values):
res[r, c] = v
return (res,)
# return (
# coo_matrix(
# (values, (rows, cols)), shape=(len(x), len(dict_labels))
# ).todense(),
# )
if isinstance(x, dict):
keys = int64_vocabulary or string_vocabulary
result = []
for k in keys:
result.append(x.get(k, 0)) # noqa: PERF401
return (np.array(result),)
raise TypeError(f"x must be iterable not {type(x)}.")

View File

@ -0,0 +1,30 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class FeatureVectorizer(OpRunAiOnnxMl):
def _preprocess(self, a, cut): # type: ignore
if len(a.shape) == 1:
a = a.reshape((-1, 1))
if len(a.shape) != 2:
raise ValueError(f"Every input must have 1 or 2 dimensions not {a.shape}.")
if cut < a.shape[1]:
return a[:, :cut]
if cut > a.shape[1]:
b = np.zeros((a.shape[0], cut), dtype=a.dtype)
b[:, : a.shape[1]] = a
return b
return a
def _run(self, *args, inputdimensions=None): # type: ignore
args = [ # type: ignore
self._preprocess(a, axis) for a, axis in zip(args, inputdimensions)
]
res = np.concatenate(args, axis=1)
return (res,)

View File

@ -0,0 +1,47 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class Imputer(OpRunAiOnnxMl):
def _run( # type: ignore
self,
x,
imputed_value_floats=None,
imputed_value_int64s=None,
replaced_value_float=None,
replaced_value_int64=None,
):
if imputed_value_floats is not None and len(imputed_value_floats) > 0:
values = imputed_value_floats
replace = replaced_value_float
elif imputed_value_int64s is not None and len(imputed_value_int64s) > 0:
values = imputed_value_int64s
replace = replaced_value_int64
else:
raise ValueError("Missing are not defined.")
if isinstance(values, list):
values = np.array(values)
if len(x.shape) != 2:
raise TypeError(f"x must be a matrix but shape is {x.shape}")
if values.shape[0] not in (x.shape[1], 1):
raise TypeError( # pragma: no cover
f"Dimension mismatch {values.shape[0]} != {x.shape[1]}"
)
x = x.copy()
if np.isnan(replace):
for i in range(x.shape[1]):
val = values[min(i, values.shape[0] - 1)]
x[np.isnan(x[:, i]), i] = val
else:
for i in range(x.shape[1]):
val = values[min(i, values.shape[0] - 1)]
x[x[:, i] == replace, i] = val
return (x,)

View File

@ -0,0 +1,50 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class LabelEncoder(OpRunAiOnnxMl):
def _run( # type: ignore
self,
x,
default_float=None,
default_int64=None,
default_string=None,
default_tensor=None,
keys_floats=None,
keys_int64s=None,
keys_strings=None,
values_floats=None,
values_int64s=None,
values_strings=None,
keys_tensor=None,
values_tensor=None,
):
keys = keys_floats or keys_int64s or keys_strings or keys_tensor
values = values_floats or values_int64s or values_strings or values_tensor
classes = dict(zip(keys, values))
if values is values_tensor:
defval = default_tensor.item()
otype = default_tensor.dtype
elif values is values_floats:
defval = default_float
otype = np.float32
elif values is values_int64s:
defval = default_int64
otype = np.int64
elif values is values_strings:
defval = default_string
otype = np.str_
if not isinstance(defval, str):
defval = ""
lookup_func = np.vectorize(lambda x: classes.get(x, defval), otypes=[otype])
output = lookup_func(x)
if output.dtype == object:
output = output.astype(np.str_)
return (output,)

View File

@ -0,0 +1,98 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._common_classifier import (
compute_probit,
compute_softmax_zero,
expit,
)
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class LinearClassifier(OpRunAiOnnxMl):
@staticmethod
def _post_process_predicted_label(label, scores, classlabels_ints_string): # type: ignore
"""Replaces int64 predicted labels by the corresponding
strings.
"""
if classlabels_ints_string is not None:
label = np.array([classlabels_ints_string[i] for i in label])
return label, scores
def _run( # type: ignore
self,
x,
classlabels_ints=None,
classlabels_strings=None,
coefficients=None,
intercepts=None,
multi_class=None, # noqa: ARG002
post_transform=None,
):
# multi_class is unused
dtype = x.dtype
if dtype != np.float64:
x = x.astype(np.float32)
coefficients = np.array(coefficients).astype(x.dtype)
intercepts = np.array(intercepts).astype(x.dtype)
coefficients = coefficients.reshape((-1, x.shape[1])).T
scores = np.dot(x, coefficients)
if intercepts is not None:
scores += intercepts
n_classes = max(len(classlabels_ints or []), len(classlabels_strings or []))
if coefficients.shape[1] == 1 and n_classes == 2:
new_scores = np.empty((scores.shape[0], 2), dtype=np.float32)
new_scores[:, 0] = -scores[:, 0]
new_scores[:, 1] = scores[:, 0]
scores = new_scores
if post_transform == "NONE":
pass
elif post_transform == "LOGISTIC":
scores = expit(scores)
elif post_transform == "SOFTMAX":
np.subtract(
scores,
scores.max(axis=1, keepdims=1),
out=scores,
)
scores = np.exp(scores)
scores = np.divide(scores, scores.sum(axis=1, keepdims=1))
elif post_transform == "SOFTMAX_ZERO":
for i in range(scores.shape[0]):
scores[i, :] = compute_softmax_zero(scores[i, :])
elif post_transform == "PROBIT":
for i in range(scores.shape[0]):
for j in range(scores.shape[1]):
scores[i, j] = compute_probit(scores[i, j])
else:
raise NotImplementedError("Unknown post_transform: '{post_transform}'.")
if scores.shape[1] > 1:
labels = np.argmax(scores, axis=1)
if classlabels_ints is not None:
labels = np.array([classlabels_ints[i] for i in labels], dtype=np.int64)
elif classlabels_strings is not None:
labels = np.array([classlabels_strings[i] for i in labels])
else:
threshold = 0 if post_transform == "NONE" else 0.5
if classlabels_ints is not None:
labels = (
np.where(scores >= threshold, classlabels_ints[0], 0)
.astype(np.int64)
.ravel()
)
elif classlabels_strings is not None:
labels = (
np.where(scores >= threshold, classlabels_strings[0], "")
.astype(np.int64)
.ravel()
)
else:
labels = (scores >= threshold).astype(np.int64).ravel()
return (labels, scores)

View File

@ -0,0 +1,26 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class LinearRegressor(OpRunAiOnnxMl):
def _run( # type: ignore
self, x, coefficients=None, intercepts=None, targets=1, post_transform=None
):
coefficients = np.array(coefficients).astype(x.dtype)
intercepts = np.array(intercepts).astype(x.dtype)
n = coefficients.shape[0] // targets
coefficients = coefficients.reshape(targets, n).T
score = np.dot(x, coefficients)
if intercepts is not None:
score += intercepts
if post_transform == "NONE":
return (score,)
raise NotImplementedError(
f"post_transform: {post_transform!r} is not implemented."
)

View File

@ -0,0 +1,41 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class Normalizer(OpRunAiOnnxMl):
@staticmethod
def norm_max(x): # type: ignore
"""Max normalization"""
div = np.abs(x).max(axis=1).reshape((x.shape[0], -1))
return x / np.maximum(div, 1e-30)
@staticmethod
def norm_l1(x): # type: ignore
"""L1 normalization"""
div = np.abs(x).sum(axis=1).reshape((x.shape[0], -1))
return x / np.maximum(div, 1e-30)
@staticmethod
def norm_l2(x): # type: ignore
"""L2 normalization"""
xn = np.square(x).sum(axis=1)
np.sqrt(xn, out=xn)
norm = np.maximum(xn.reshape((x.shape[0], -1)), 1e-30)
return x / norm
def _run(self, x, norm=None): # type: ignore
if norm == "MAX":
_norm = Normalizer.norm_max
elif norm == "L1":
_norm = Normalizer.norm_l1
elif norm == "L2":
_norm = Normalizer.norm_l2
else:
raise ValueError(f"Unexpected value for norm='{norm}'.")
return (_norm(x),)

View File

@ -0,0 +1,53 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class OneHotEncoder(OpRunAiOnnxMl):
def _run(self, x, cats_int64s=None, cats_strings=None, zeros=None): # type: ignore
if cats_int64s is not None and len(cats_int64s) > 0:
classes = {v: i for i, v in enumerate(cats_int64s)}
elif len(cats_strings) > 0:
classes = {v: i for i, v in enumerate(cats_strings)}
else:
raise RuntimeError("No encoding was defined.")
shape = x.shape
new_shape = (*shape, len(classes))
res = np.zeros(new_shape, dtype=np.float32)
if len(x.shape) == 1:
for i, v in enumerate(x):
j = classes.get(v, -1)
if j >= 0:
res[i, j] = 1.0
elif len(x.shape) == 2:
for a, row in enumerate(x):
for i, v in enumerate(row):
j = classes.get(v, -1)
if j >= 0:
res[a, i, j] = 1.0
else:
raise RuntimeError(f"This operator is not implemented shape {x.shape}.")
if not zeros:
red = res.sum(axis=len(res.shape) - 1)
if np.min(red) == 0:
rows = []
for i, val in enumerate(red):
if val == 0:
rows.append({"row": i, "value": x[i]})
if len(rows) > 5:
break
msg = "\n".join(str(_) for _ in rows)
raise RuntimeError(
f"One observation did not have any defined category.\n"
f"classes: {classes}\nfirst rows:\n"
f"{msg}\nres:\n{res[:5]}\nx:\n{x[:5]}"
)
return (res,)

View File

@ -0,0 +1,12 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class Scaler(OpRunAiOnnxMl):
def _run(self, x, offset=None, scale=None): # type: ignore
dx = x - offset
return ((dx * scale).astype(x.dtype),)

View File

@ -0,0 +1,334 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._common_classifier import (
compute_logistic,
compute_probit,
compute_softmax_zero,
logistic,
softmax,
softmax_zero,
)
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
from onnx.reference.ops.aionnxml.op_svm_helper import SVMCommon
def multiclass_probability(k, R):
max_iter = max(100, k)
Q = np.empty((k, k), dtype=R.dtype)
Qp = np.empty((k,), dtype=R.dtype)
P = np.empty((k,), dtype=R.dtype)
eps = 0.005 / k
for t in range(k):
P[t] = 1.0 / k
Q[t, t] = (R[:t, t] ** 2).sum()
Q[t, :t] = Q[:t, t]
Q[t, t] += (R[t + 1 :, t] ** 2).sum()
Q[t, t + 1 :] = -R[t + 1 :, t] @ R[t, t + 1 :]
for _ in range(max_iter):
# stopping condition, recalculate QP,pQP for numerical accuracy
Qp[:] = Q @ P
pQp = (P * Qp).sum()
max_error = 0
for t in range(k):
error = np.abs(Qp[t] - pQp)
if error > max_error:
max_error = error
if max_error < eps:
break
for t in range(k):
diff = (-Qp[t] + pQp) / Q[t, t]
P[t] += diff
pQp = (pQp + diff * (diff * Q[t, t] + 2 * Qp[t])) / (1 + diff) ** 2
P /= 1 + diff
Qp[:] = (Qp + diff * Q[t, :]) / (1 + diff)
return P
def sigmoid_probability(score, proba, probb):
# ref: https://github.com/arnaudsj/libsvm/blob/eaaefac5ebd32d0e07902e1ae740e038eaaf0826/svm.cpp#L1818
val = score * proba + probb
return 1 - compute_logistic(val)
def write_scores(n_classes, scores, post_transform, add_second_class): # noqa: PLR0911
if n_classes >= 2:
if post_transform == "PROBIT":
res = [compute_probit(score) for score in scores]
return np.array(res, dtype=scores.dtype)
if post_transform == "LOGISTIC":
return logistic(scores)
if post_transform == "SOFTMAX":
return softmax(scores)
if post_transform == "SOFTMAX_ZERO":
return compute_softmax_zero(scores)
return scores
if n_classes == 1:
if post_transform == "PROBIT":
return np.array([compute_probit(scores[0])], dtype=scores.dtype)
if add_second_class in (0, 1):
return np.array([1 - scores[0], scores[0]], dtype=scores.dtype)
if add_second_class in (2, 3):
if post_transform == "LOGISTIC":
return np.array(
[logistic(-scores[0]), logistic(scores[0])], dtype=scores.dtype
)
if post_transform == "SOFTMAX":
return softmax(np.array([-scores[0], scores[0]], dtype=scores.dtype))
if post_transform == "SOFTMAX_ZERO":
return softmax_zero(
np.array([-scores[0], scores[0]], dtype=scores.dtype)
)
if post_transform == "PROBIT":
raise RuntimeError(
f"post_transform={post_transform!r} not applicable here."
)
return np.array([-scores[0], scores[0]], dtype=scores.dtype)
return np.array([scores[0]], dtype=scores.dtype)
raise NotImplementedError(f"n_classes={n_classes} not supported.")
def set_score_svm(
max_weight,
maxclass,
has_proba,
weights_are_all_positive_,
classlabels,
posclass,
negclass,
):
write_additional_scores = -1
if len(classlabels) == 2:
write_additional_scores = 2
if not has_proba:
if weights_are_all_positive_ and max_weight >= 0.5:
return classlabels[1], write_additional_scores
if max_weight > 0 and not weights_are_all_positive_:
return classlabels[maxclass], write_additional_scores
return classlabels[maxclass], write_additional_scores
if max_weight > 0:
return posclass, write_additional_scores
return negclass, write_additional_scores
class SVMClassifier(OpRunAiOnnxMl):
def _run_linear(self, X, coefs, class_count_, kernel_type_):
scores = []
for j in range(class_count_):
d = self._svm.kernel_dot(X, coefs[j], kernel_type_)
score = self._svm.atts.rho[0] + d # type: ignore
scores.append(score)
return np.array(scores, dtype=X.dtype)
def _run_svm(
self, X, sv, vector_count_, kernel_type_, class_count_, starting_vector_, coefs
):
evals = 0
kernels_list = [
self._svm.kernel_dot(X, sv[j], kernel_type_) for j in range(vector_count_)
]
kernels = np.array(kernels_list)
votes = np.zeros((class_count_,), dtype=X.dtype)
scores = []
for i in range(class_count_):
si_i = starting_vector_[i]
class_i_sc = self._svm.atts.vectors_per_class[i] # type: ignore
for j in range(i + 1, class_count_):
si_j = starting_vector_[j]
class_j_sc = self._svm.atts.vectors_per_class[j] # type: ignore
s1 = np.dot(
coefs[j - 1, si_i : si_i + class_i_sc],
kernels[si_i : si_i + class_i_sc],
)
s2 = np.dot(
coefs[i, si_j : si_j + class_j_sc],
kernels[si_j : si_j + class_j_sc],
)
s = self._svm.atts.rho[evals] + s1 + s2 # type: ignore
scores.append(s)
if s > 0:
votes[i] += 1
else:
votes[j] += 1
evals += 1
return votes, np.array(scores, dtype=X.dtype)
def _probabilities(self, scores, class_count_):
probsp2 = np.zeros((class_count_, class_count_), dtype=scores.dtype)
index = 0
for i in range(class_count_):
for j in range(i + 1, class_count_):
val1 = sigmoid_probability(
scores[index],
self._svm.atts.prob_a[index], # type: ignore
self._svm.atts.prob_b[index], # type: ignore
)
val2 = max(val1, 1.0e-7)
val2 = min(val2, (1 - 1.0e-7))
probsp2[i, j] = val2
probsp2[j, i] = 1 - val2
index += 1
return multiclass_probability(class_count_, probsp2)
def _compute_final_scores(
self, votes, scores, weights_are_all_positive_, has_proba, classlabels_ints
):
max_weight = 0
if votes is not None and len(votes) > 0:
max_class = np.argmax(votes)
max_weight = votes[max_class]
else:
max_class = np.argmax(scores)
max_weight = scores[max_class]
write_additional_scores = -1
if self._svm.atts.rho.size == 1: # type: ignore
label, write_additional_scores = set_score_svm(
max_weight,
max_class,
has_proba,
weights_are_all_positive_,
classlabels_ints,
1,
0,
)
elif classlabels_ints is not None and len(classlabels_ints) > 0:
label = classlabels_ints[max_class]
else:
label = max_class
new_scores = write_scores(
scores.size,
scores,
self._svm.atts.post_transform,
write_additional_scores, # type: ignore
)
return label, new_scores
def _run( # type: ignore
self,
X,
classlabels_ints=None,
classlabels_strings=None,
coefficients=None,
kernel_params=None,
kernel_type=None,
post_transform=None,
prob_a=None,
prob_b=None,
rho=None,
support_vectors=None,
vectors_per_class=None,
):
svm = SVMCommon(
coefficients=coefficients,
kernel_params=kernel_params,
kernel_type=kernel_type,
post_transform=post_transform,
prob_a=prob_a,
prob_b=prob_b,
rho=rho,
support_vectors=support_vectors,
vectors_per_class=vectors_per_class,
)
# unused unless for debugging purposes
self._svm = svm
vector_count_ = 0
class_count_ = max(len(classlabels_ints or classlabels_strings or []), 1)
starting_vector_ = []
if svm.atts.vectors_per_class is not None: # type: ignore
for vc in svm.atts.vectors_per_class: # type: ignore
starting_vector_.append(vector_count_)
vector_count_ += vc
if vector_count_ > 0:
# length of each support vector
mode = "SVM_SVC"
sv = svm.atts.support_vectors.reshape((vector_count_, -1)) # type: ignore
kernel_type_ = svm.atts.kernel_type # type: ignore
coefs = svm.atts.coefficients.reshape((-1, vector_count_)) # type: ignore
else:
# liblinear mode
mode = "SVM_LINEAR"
kernel_type_ = "LINEAR"
coefs = svm.atts.coefficients.reshape((class_count_, -1)) # type: ignore
weights_are_all_positive_ = min(svm.atts.coefficients) >= 0 # type: ignore
# SVM part
if vector_count_ == 0 and mode == "SVM_LINEAR":
res = np.empty((X.shape[0], class_count_), dtype=X.dtype)
for n in range(X.shape[0]):
scores = self._run_linear(X[n], coefs, class_count_, kernel_type_)
res[n, :] = scores
votes = None
else:
res = np.empty(
(X.shape[0], class_count_ * (class_count_ - 1) // 2), dtype=X.dtype
)
votes = np.empty((X.shape[0], class_count_), dtype=X.dtype)
for n in range(X.shape[0]):
vote, scores = self._run_svm(
X[n],
sv,
vector_count_,
kernel_type_,
class_count_,
starting_vector_,
coefs,
)
res[n, :] = scores
votes[n, :] = vote
# proba
if (
svm.atts.prob_a is not None # type: ignore
and len(svm.atts.prob_a) > 0 # type: ignore
and mode == "SVM_SVC"
):
scores = np.empty((res.shape[0], class_count_), dtype=X.dtype)
for n in range(scores.shape[0]):
s = self._probabilities(res[n], class_count_)
scores[n, :] = s
has_proba = True
else:
scores = res
has_proba = False
# finalization
final_scores = None
labels = []
for n in range(scores.shape[0]):
label, new_scores = self._compute_final_scores(
None if votes is None else votes[n],
scores[n],
weights_are_all_positive_,
has_proba,
classlabels_ints,
)
if final_scores is None:
final_scores = np.empty((X.shape[0], new_scores.size), dtype=X.dtype)
final_scores[n, :] = new_scores
labels.append(label)
# labels
if classlabels_strings is not None and len(classlabels_strings) > 0:
return (np.array([classlabels_strings[i] for i in labels]), final_scores)
return (np.array(labels, dtype=np.int64), final_scores)

View File

@ -0,0 +1,97 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import Any
import numpy as np
class SVMAttributes:
def __init__(self):
self._names = []
def add(self, name: str, value: Any) -> None:
if isinstance(value, list) and name not in {"kernel_params"}:
if name in {"vectors_per_class"}:
value = np.array(value, dtype=np.int64)
else:
value = np.array(value, dtype=np.float32)
setattr(self, name, value)
def __str__(self) -> str:
rows = ["Attributes"]
for name in self._names:
rows.append(f" {name}={getattr(self, name)}") # noqa: PERF401
return "\n".join(rows)
class SVMCommon:
"""Base class for SVM."""
def __init__(self, **kwargs): # type: ignore
self.atts = SVMAttributes()
for name, value in kwargs.items():
self.atts.add(name, value)
if self.atts.kernel_params: # type: ignore
self.gamma_ = self.atts.kernel_params[0] # type: ignore
self.coef0_ = self.atts.kernel_params[1] # type: ignore
self.degree_ = int(self.atts.kernel_params[2]) # type: ignore
else:
self.gamma_ = 0.0
self.coef0_ = 0.0
self.degree_ = 0
def __str__(self) -> str:
rows = ["TreeEnsemble", f"root_index={self.root_index}", str(self.atts)] # type: ignore
return "\n".join(rows)
def kernel_dot(self, pA: np.ndarray, pB: np.ndarray, kernel: str) -> np.ndarray:
k = kernel.lower()
if k == "poly":
s = np.dot(pA, pB)
s = s * self.gamma_ + self.coef0_
return s**self.degree_ # type: ignore
if k == "sigmoid":
s = np.dot(pA, pB)
s = s * self.gamma_ + self.coef0_
return np.tanh(s) # type: ignore
if k == "rbf":
diff = pA - pB
s = (diff * diff).sum()
return np.exp(-self.gamma_ * s) # type: ignore
if k == "linear":
return np.dot(pA, pB) # type: ignore
raise ValueError(f"Unexpected kernel={kernel!r}.")
def run_reg(self, X: np.ndarray) -> np.ndarray:
if self.atts.n_supports > 0: # type: ignore
# length of each support vector
mode_ = "SVM_SVC"
kernel_type_ = self.atts.kernel_type # type: ignore
sv = self.atts.support_vectors.reshape((self.atts.n_supports, -1)) # type: ignore
else:
mode_ = "SVM_LINEAR"
kernel_type_ = "LINEAR"
z = np.empty((X.shape[0], 1), dtype=X.dtype)
for n in range(X.shape[0]):
s = 0.0
if mode_ == "SVM_SVC":
for j in range(self.atts.n_supports): # type: ignore
d = self.kernel_dot(X[n], sv[j], kernel_type_)
s += self.atts.coefficients[j] * d # type: ignore
s += self.atts.rho[0] # type: ignore
elif mode_ == "SVM_LINEAR":
s = self.kernel_dot(X[n], self.atts.coefficients, kernel_type_) # type: ignore
s += self.atts.rho[0] # type: ignore
if self.atts.one_class: # type: ignore
z[n, 0] = 1 if s > 0 else -1
else:
z[n, 0] = s
return z

View File

@ -0,0 +1,43 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
from onnx.reference.ops.aionnxml.op_svm_helper import SVMCommon
class SVMRegressor(OpRunAiOnnxMl):
"""The class only implements `POST_TRANSFORM="NONE"`."""
def _run( # type: ignore
self,
X,
coefficients=None,
kernel_params=None,
kernel_type=None,
n_targets=None,
n_supports=None,
one_class=None,
post_transform=None,
rho=None,
support_vectors=None,
):
svm = SVMCommon(
coefficients=coefficients,
kernel_params=kernel_params,
kernel_type=kernel_type,
n_targets=n_targets,
n_supports=n_supports,
one_class=one_class,
post_transform=post_transform,
rho=rho,
support_vectors=support_vectors,
)
# adding an attribute for debugging purpose
self._svm = svm
res = svm.run_reg(X)
if post_transform in (None, "NONE"):
return (res,)
raise NotImplementedError(f"post_transform={post_transform!r} not implemented.")

View File

@ -0,0 +1,257 @@
from __future__ import annotations
from enum import IntEnum
from typing import Callable
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
class AggregationFunction(IntEnum):
AVERAGE = 0
SUM = 1
MIN = 2
MAX = 3
class PostTransform(IntEnum):
NONE = 0
SOFTMAX = 1
LOGISTIC = 2
SOFTMAX_ZERO = 3
PROBIT = 4
class Mode(IntEnum):
LEQ = 0
LT = 1
GTE = 2
GT = 3
EQ = 4
NEQ = 5
MEMBER = 6
class Leaf:
def __init__(self, weight: float, target_id: int) -> None:
self.weight = weight
self.target_id = target_id
# Produce the weight and target index
def predict(self, x: np.ndarray) -> np.ndarray: # noqa: ARG002
return np.array([self.weight, self.target_id])
def _print(self, prefix: list, indent: int = 0) -> None:
prefix.append(
" " * indent + f"Leaf WEIGHT: {self.weight}, TARGET: {self.target_id}\n"
)
def __repr__(self) -> str:
prefix = []
self._print(prefix)
return "".join(prefix)
class Node:
compare: Callable[[float, float | set[float]], bool]
true_branch: Node | Leaf
false_branch: Node | Leaf
feature: int
def __init__(
self,
mode: Mode,
value: float | set[float],
feature: int,
missing_tracks_true: bool,
) -> None:
if mode == Mode.LEQ:
self.compare = lambda x: x[feature].item() <= value or (
missing_tracks_true and np.isnan(x[feature].item())
)
elif mode == Mode.LT:
self.compare = lambda x: x[feature].item() < value or (
missing_tracks_true and np.isnan(x[feature].item())
)
elif mode == Mode.GTE:
self.compare = lambda x: x[feature].item() >= value or (
missing_tracks_true and np.isnan(x[feature].item())
)
elif mode == Mode.GT:
self.compare = lambda x: x[feature].item() > value or (
missing_tracks_true and np.isnan(x[feature].item())
)
elif mode == Mode.EQ:
self.compare = lambda x: x[feature].item() == value or (
missing_tracks_true and np.isnan(x[feature].item())
)
elif mode == Mode.NEQ:
self.compare = lambda x: x[feature].item() != value or (
missing_tracks_true and np.isnan(x[feature].item())
)
elif mode == Mode.MEMBER:
self.compare = lambda x: x[feature].item() in value or (
missing_tracks_true and np.isnan(x[feature].item())
)
self.mode = mode
self.value = value
self.feature = feature
def predict(self, x: np.ndarray) -> float:
if self.compare(x):
return self.true_branch.predict(x)
else:
return self.false_branch.predict(x)
def _print(self, prefix: list, indent: int = 0) -> None:
prefix.append(
" " * indent
+ f"Node CMP: {self.mode}, SPLIT: {self.value}, FEATURE: {self.feature}\n"
)
self.true_branch._print(prefix, indent + 1)
self.false_branch._print(prefix, indent + 1)
def __repr__(self) -> str:
prefix = []
self._print(prefix)
return "".join(prefix)
class TreeEnsemble(OpRunAiOnnxMl):
def _run(
self,
X,
nodes_splits,
nodes_featureids,
nodes_modes,
nodes_truenodeids,
nodes_falsenodeids,
nodes_trueleafs,
nodes_falseleafs,
leaf_targetids,
leaf_weights,
tree_roots,
post_transform=PostTransform.NONE, # noqa: ARG002
aggregate_function=AggregationFunction.SUM,
nodes_hitrates=None, # noqa: ARG002
nodes_missing_value_tracks_true=None,
membership_values=None,
n_targets=None,
):
if membership_values is None:
# assert that no set membership ever appears
if any(mode == Mode.MEMBER for mode in nodes_modes):
raise ValueError(
"Cannot have set membership node without specifying set members"
)
elif np.isnan(membership_values).sum() != sum(
int(mode == Mode.MEMBER) for mode in nodes_modes
):
raise ValueError(
"Must specify membership values for all set membership nodes"
)
# Build each tree in the ensemble. Note that the tree structure is implicitly defined by following the true and false indices in
# `nodes_truenodeids` and `nodes_falsenodeids` to the leaves of each tree.
set_membership_iter = (
iter(membership_values) if membership_values is not None else None
)
def build_node(current_node_index, is_leaf) -> Node | Leaf:
if is_leaf:
return Leaf(
leaf_weights[current_node_index], leaf_targetids[current_node_index]
)
if nodes_modes[current_node_index] == Mode.MEMBER:
# parse next sequence of set members
set_members = set()
while (set_member := next(set_membership_iter)) and not np.isnan(
set_member
):
set_members.add(set_member)
node = Node(
nodes_modes[current_node_index],
set_members,
nodes_featureids[current_node_index],
(
nodes_missing_value_tracks_true[current_node_index]
if nodes_missing_value_tracks_true is not None
else False
),
)
else:
node = Node(
nodes_modes[current_node_index],
nodes_splits[current_node_index],
nodes_featureids[current_node_index],
(
nodes_missing_value_tracks_true[current_node_index]
if nodes_missing_value_tracks_true is not None
else False
),
)
# recurse true and false branches
node.true_branch = build_node(
nodes_truenodeids[current_node_index],
nodes_trueleafs[current_node_index],
)
node.false_branch = build_node(
nodes_falsenodeids[current_node_index],
nodes_falseleafs[current_node_index],
)
return node
trees = []
for root_index in tree_roots:
# degenerate case (tree == leaf)
is_leaf = (
nodes_trueleafs[root_index]
and nodes_falseleafs[root_index]
and nodes_truenodeids[root_index] == nodes_falsenodeids[root_index]
)
trees.append(build_node(root_index, is_leaf))
# predict each sample through tree
raw_values = [
np.apply_along_axis(tree.predict, axis=1, arr=X) for tree in trees
]
weights, target_ids = zip(*[np.split(x, 2, axis=1) for x in raw_values])
weights = np.concatenate(weights, axis=1)
target_ids = np.concatenate(target_ids, axis=1).astype(np.int64)
if aggregate_function in (
AggregationFunction.SUM,
AggregationFunction.AVERAGE,
):
result = np.zeros((len(X), n_targets), dtype=X.dtype)
elif aggregate_function == AggregationFunction.MIN:
result = np.full((len(X), n_targets), np.finfo(X.dtype).max)
elif aggregate_function == AggregationFunction.MAX:
result = np.full((len(X), n_targets), np.finfo(X.dtype).min)
else:
raise NotImplementedError(
f"aggregate_transform={aggregate_function!r} not supported yet."
)
for batch_num, (w, t) in enumerate(zip(weights, target_ids)):
weight = w.reshape(-1)
target_id = t.reshape(-1)
if aggregate_function == AggregationFunction.SUM:
for value, tid in zip(weight, target_id):
result[batch_num, tid] += value
elif aggregate_function == AggregationFunction.AVERAGE:
for value, tid in zip(weight, target_id):
result[batch_num, tid] += value / len(trees)
elif aggregate_function == AggregationFunction.MIN:
for value, tid in zip(weight, target_id):
result[batch_num, tid] = min(result[batch_num, tid], value)
elif aggregate_function == AggregationFunction.MAX:
for value, tid in zip(weight, target_id):
result[batch_num, tid] = max(result[batch_num, tid], value)
else:
raise NotImplementedError(
f"aggregate_transform={aggregate_function!r} not supported yet."
)
return (result,)

View File

@ -0,0 +1,132 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._common_classifier import (
logistic,
probit,
softmax,
softmax_zero,
)
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
from onnx.reference.ops.aionnxml.op_tree_ensemble_helper import TreeEnsemble
class TreeEnsembleClassifier(OpRunAiOnnxMl):
def _run( # type: ignore
self,
X,
base_values=None,
base_values_as_tensor=None,
class_ids=None,
class_nodeids=None,
class_treeids=None,
class_weights=None,
class_weights_as_tensor=None,
classlabels_int64s=None,
classlabels_strings=None,
nodes_falsenodeids=None,
nodes_featureids=None,
nodes_hitrates=None,
nodes_hitrates_as_tensor=None,
nodes_missing_value_tracks_true=None,
nodes_modes=None,
nodes_nodeids=None,
nodes_treeids=None,
nodes_truenodeids=None,
nodes_values=None,
nodes_values_as_tensor=None,
post_transform=None,
):
nmv = nodes_missing_value_tracks_true
tr = TreeEnsemble(
base_values=base_values,
base_values_as_tensor=base_values_as_tensor,
nodes_falsenodeids=nodes_falsenodeids,
nodes_featureids=nodes_featureids,
nodes_hitrates=nodes_hitrates,
nodes_hitrates_as_tensor=nodes_hitrates_as_tensor,
nodes_missing_value_tracks_true=nmv,
nodes_modes=nodes_modes,
nodes_nodeids=nodes_nodeids,
nodes_treeids=nodes_treeids,
nodes_truenodeids=nodes_truenodeids,
nodes_values=nodes_values,
nodes_values_as_tensor=nodes_values_as_tensor,
class_weights=class_weights,
class_weights_as_tensor=class_weights_as_tensor,
)
# unused unless for debugging purposes
self._tree = tr
if X.dtype not in (np.float32, np.float64):
X = X.astype(np.float32)
leaves_index = tr.leave_index_tree(X)
n_classes = max(len(classlabels_int64s or []), len(classlabels_strings or []))
res = np.empty((leaves_index.shape[0], n_classes), dtype=np.float32)
if tr.atts.base_values is None: # type: ignore
res[:, :] = 0
else:
res[:, :] = np.array(tr.atts.base_values).reshape((1, -1)) # type: ignore
class_index = {} # type: ignore
for i, (tid, nid) in enumerate(zip(class_treeids, class_nodeids)):
if (tid, nid) not in class_index:
class_index[tid, nid] = []
class_index[tid, nid].append(i)
for i in range(res.shape[0]):
indices = leaves_index[i]
t_index = [class_index[nodes_treeids[i], nodes_nodeids[i]] for i in indices]
for its in t_index:
for it in its:
res[i, class_ids[it]] += tr.atts.class_weights[it] # type: ignore
# post_transform
binary = len(set(class_ids)) == 1
classes = classlabels_int64s or classlabels_strings
post_function = {
None: lambda x: x,
"NONE": lambda x: x,
"LOGISTIC": logistic,
"SOFTMAX": softmax,
"SOFTMAX_ZERO": softmax_zero,
"PROBIT": probit,
}
if binary:
if res.shape[1] == len(classes) == 1:
new_res = np.zeros((res.shape[0], 2), res.dtype)
new_res[:, 1] = res[:, 0]
res = new_res
else:
res[:, 1] = res[:, 0]
if post_transform in (None, "NONE", "PROBIT"):
res[:, 0] = 1 - res[:, 1]
else:
res[:, 0] = -res[:, 1]
new_scores = post_function[post_transform](res) # type: ignore
labels = np.argmax(new_scores, axis=1)
# labels
if classlabels_int64s is not None:
if len(classlabels_int64s) == 1:
if classlabels_int64s[0] == 1:
d = {1: 1}
labels = np.array([d.get(i, 0) for i in labels], dtype=np.int64)
else:
raise NotImplementedError(
f"classlabels_int64s={classlabels_int64s}, not supported."
)
else:
labels = np.array(
[classlabels_int64s[i] for i in labels], dtype=np.int64
)
elif classlabels_strings is not None:
if len(classlabels_strings) == 1:
raise NotImplementedError(
f"classlabels_strings={classlabels_strings}, not supported."
)
labels = np.array([classlabels_strings[i] for i in labels])
return labels, new_scores

View File

@ -0,0 +1,105 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
class TreeEnsembleAttributes:
def __init__(self):
self._names = []
def add(self, name, value):
if not name.endswith("_as_tensor"):
self._names.append(name)
if isinstance(value, list):
if name in {
"base_values",
"class_weights",
"nodes_values",
"nodes_hitrates",
}:
value = np.array(value, dtype=np.float32)
elif name.endswith("as_tensor"):
value = np.array(value)
setattr(self, name, value)
def __str__(self):
rows = ["Attributes"]
for name in self._names:
if name.endswith("_as_tensor"):
name = name.replace("_as_tensor", "") # noqa: PLW2901
rows.append(f" {name}={getattr(self, name)}")
return "\n".join(rows)
class TreeEnsemble:
def __init__(self, **kwargs):
self.atts = TreeEnsembleAttributes()
for name, value in kwargs.items():
self.atts.add(name, value)
self.tree_ids = sorted(set(self.atts.nodes_treeids)) # type: ignore
self.root_index = {
tid: len(self.atts.nodes_treeids) for tid in self.tree_ids # type: ignore
}
for index, tree_id in enumerate(self.atts.nodes_treeids): # type: ignore
self.root_index[tree_id] = min(self.root_index[tree_id], index)
self.node_index = {
(tid, nid): i
for i, (tid, nid) in enumerate(
zip(self.atts.nodes_treeids, self.atts.nodes_nodeids) # type: ignore
)
}
def __str__(self) -> str:
rows = ["TreeEnsemble", f"root_index={self.root_index}", str(self.atts)]
return "\n".join(rows)
def leaf_index_tree(self, X: np.ndarray, tree_id: int) -> int:
"""Computes the leaf index for one tree."""
index = self.root_index[tree_id]
while self.atts.nodes_modes[index] != "LEAF": # type: ignore
x = X[self.atts.nodes_featureids[index]] # type: ignore
if np.isnan(x):
r = self.atts.nodes_missing_value_tracks_true[index] >= 1 # type: ignore
else:
rule = self.atts.nodes_modes[index] # type: ignore
th = self.atts.nodes_values[index] # type: ignore
if rule == "BRANCH_LEQ":
r = x <= th
elif rule == "BRANCH_LT":
r = x < th
elif rule == "BRANCH_GTE":
r = x >= th
elif rule == "BRANCH_GT":
r = x > th
elif rule == "BRANCH_EQ":
r = x == th
elif rule == "BRANCH_NEQ":
r = x != th
else:
raise ValueError(
f"Unexpected rule {rule!r} for node index {index}."
)
nid = (
self.atts.nodes_truenodeids[index] # type: ignore
if r
else self.atts.nodes_falsenodeids[index] # type: ignore
)
index = self.node_index[tree_id, nid]
return index
def leave_index_tree(self, X: np.ndarray) -> np.ndarray:
"""Computes the leave index for all trees."""
if len(X.shape) == 1:
X = X.reshape((1, -1))
outputs = []
for row in X:
outs = []
for tree_id in self.tree_ids:
outs.append(self.leaf_index_tree(row, tree_id)) # noqa: PERF401
outputs.append(outs)
return np.array(outputs)

View File

@ -0,0 +1,107 @@
# Copyright (c) ONNX Project Contributors
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numpy as np
from onnx.reference.ops.aionnxml._op_run_aionnxml import OpRunAiOnnxMl
from onnx.reference.ops.aionnxml.op_tree_ensemble_helper import TreeEnsemble
class TreeEnsembleRegressor(OpRunAiOnnxMl):
"""`nodes_hitrates` and `nodes_hitrates_as_tensor` are not used."""
def _run( # type: ignore
self,
X,
aggregate_function=None,
base_values=None,
base_values_as_tensor=None,
n_targets=None,
nodes_falsenodeids=None,
nodes_featureids=None,
nodes_hitrates=None,
nodes_hitrates_as_tensor=None,
nodes_missing_value_tracks_true=None,
nodes_modes=None,
nodes_nodeids=None,
nodes_treeids=None,
nodes_truenodeids=None,
nodes_values=None,
nodes_values_as_tensor=None,
post_transform=None,
target_ids=None,
target_nodeids=None,
target_treeids=None,
target_weights=None,
target_weights_as_tensor=None,
):
nmv = nodes_missing_value_tracks_true
tr = TreeEnsemble(
base_values=base_values,
base_values_as_tensor=base_values_as_tensor,
nodes_falsenodeids=nodes_falsenodeids,
nodes_featureids=nodes_featureids,
nodes_hitrates=nodes_hitrates,
nodes_hitrates_as_tensor=nodes_hitrates_as_tensor,
nodes_missing_value_tracks_true=nmv,
nodes_modes=nodes_modes,
nodes_nodeids=nodes_nodeids,
nodes_treeids=nodes_treeids,
nodes_truenodeids=nodes_truenodeids,
nodes_values=nodes_values,
nodes_values_as_tensor=nodes_values_as_tensor,
target_weights=target_weights,
target_weights_as_tensor=target_weights_as_tensor,
)
# unused unless for debugging purposes
self._tree = tr
leaves_index = tr.leave_index_tree(X)
res = np.zeros((leaves_index.shape[0], n_targets), dtype=X.dtype)
n_trees = len(set(tr.atts.nodes_treeids)) # type: ignore
target_index = {} # type: ignore
for i, (tid, nid) in enumerate(zip(target_treeids, target_nodeids)):
if (tid, nid) not in target_index:
target_index[tid, nid] = []
target_index[tid, nid].append(i)
for i in range(res.shape[0]):
indices = leaves_index[i]
t_index = [
target_index[nodes_treeids[i], nodes_nodeids[i]] for i in indices
]
if aggregate_function in ("SUM", "AVERAGE"):
for its in t_index:
for it in its:
res[i, target_ids[it]] += tr.atts.target_weights[it] # type: ignore
elif aggregate_function == "MIN":
res[i, :] = np.finfo(res.dtype).max
for its in t_index:
for it in its:
res[i, target_ids[it]] = min(
res[i, target_ids[it]],
tr.atts.target_weights[it], # type: ignore
)
elif aggregate_function == "MAX":
res[i, :] = np.finfo(res.dtype).min
for its in t_index:
for it in its:
res[i, target_ids[it]] = max(
res[i, target_ids[it]],
tr.atts.target_weights[it], # type: ignore
)
else:
raise NotImplementedError(
f"aggregate_transform={aggregate_function!r} not supported yet."
)
if aggregate_function == "AVERAGE":
res /= n_trees
# Convention is to add base_values after aggregate function
if base_values is not None:
res[:, :] += np.array(base_values).reshape((1, -1))
if post_transform in (None, "NONE"):
return (res,)
raise NotImplementedError(f"post_transform={post_transform!r} not implemented.")