I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,74 @@
# This yaml file contains all the possible tags that can be defined in `tags` in `native_functions.yaml`
- tag: inplace_view
desc: |
This tag indicates if an operator *only* modifies the tensor metadata
- tag: pt2_compliant_tag
desc: |
This tag indicates if the operator is guaranteed to
work with the PT2 compilation APIs (torch.compile,
torch.export, etc). If you add this tag to an
operator, please use
`torch.testing._internal.optest.opcheck` to test that
the operator has been registered correctly and
works with torch.compile
- tag: view_copy
desc: |
This tag indicates operators that are *_copy* variants
of view/aliasing operators. If an operator has a view_copy tag,
then it should have the name {op}_copy, where {op} is a view operator.
- tag: dynamic_output_shape
desc: |
This tag indicates if an operator's output's shape depends on input Tensor
data.
- tag: data_dependent_output
desc: |
Operator has a non-Tensor output whose value is dependent on the data
of Tensor inputs. Among other things, this implies that this operator
cannot be run with meta tensor (since data is not available), nor
can it be symbolically traced.
- tag: generated
desc: |
This tag indicates that the operator doesn't have an explicit entry in
native_functions.yaml, and instead was generated automatically by the codegen.
- tag: nondeterministic_seeded
desc: |
This tag indicates if an operator is nondeterministically seeded
(i.e., is random) such that the operator intentionally produces
different results when run twice on the same inputs, but this randomness
is controlled by a Generator which, if reseeded would give you the
same result.
- tag: nondeterministic_bitwise
desc: |
This tag indicates if an operator doesn't guarantee bitwise equivalence
across different runs of an operator with identical inputs.
- tag: needs_fixed_stride_order
desc: |
This tag indicates that the operator should be passed Tensors following
the same stride permutation as observed in eager when compiled in inductor.
Only one of {needs_fixed_stride_order, flexible_layout} can apply; if
multiple are assigned then we assume the most restrictive one.
- tag: flexible_layout
desc: |
This tag indicates that the custom operator can accept inputs with varying
strides/storage_offset and that when compiled, Inductor is allowed to change
the strides/storage_offset of inputs to the custom operator.
Only one of {needs_fixed_stride_order, flexible_layout} can apply; if
multiple are assigned then we assume the most restrictive one.
# NOTE [Core ATen Ops]
- tag: core
desc: |
Core aten ops is a subset of aten ops that remains after aten-to-aten decomposition and
functionalization pass. Core aten ops are fully functional and adhere to single static
assignment (SSA): this implies there will be no `inplace` or `_out` variants in this opset.
This opset is designed to serve as the functional IR to interface with compiler backends.
In contrast to primTorch, core aten opset doesn't decompose ops into explicit
type promotion and broadcasting ops.
Core aten ops is also effectively the opset produced by torchdynamo.export(aten_graph=True),
and thus can be used as an opset for export purpose.
- tag: pointwise
desc: |
Pointwise operators are operators where each element of the output is computed only by accessing
the corresponding element of all the broadcasted inputs. The output shape will be the broadcasted
shape of the inputs.

View File

@ -0,0 +1,36 @@
#include <ATen/core/ATenOpList.h>
#include <string>
#include <cstring>
#include <utility>
#include <unordered_set>
#include <ATen/core/operator_name.h>
// ${generated_comment}
namespace at {
namespace {
struct OpNameEquals final {
bool operator()(const std::pair<const char*, const char*>& lhs, const std::pair<const char*, const char*>& rhs) const {
return 0 == strcmp(lhs.first, rhs.first) && 0 == strcmp(lhs.second, rhs.second);
}
};
struct OpNameHash final {
size_t operator()(const std::pair<const char*, const char*>& p) const {
// use std::hash<std::string> because std::hash<const char*> would hash pointers and not pointed-to strings
return std::hash<std::string>()(p.first) ^ (~ std::hash<std::string>()(p.second));
}
};
}
bool is_custom_op(const c10::OperatorName& opName) {
static std::unordered_set<std::pair<const char*, const char*>, OpNameHash, OpNameEquals> ops {
${aten_ops}
{"", ""}
};
return ops.count(std::make_pair(
opName.name.c_str(), opName.overload_name.c_str())) == 0;
}
}

View File

@ -0,0 +1,73 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include <ATen/InferSize.h>
#include <ATen/Tensor.h>
#include <ATen/native/Resize.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
#include <ATen/ops/clone.h>
$ops_headers
#endif
namespace at {
namespace native {
// This file contains a number of kernels for aten functions that are fully code-generated.
// TODO: rename this file to something more generic.
namespace {
at::Tensor clone_arg(const at::Tensor& t) {
return t.clone();
}
std::vector<at::Tensor> clone_arg(const at::TensorList& t_list) {
std::vector<at::Tensor> out(t_list.size());
for (const auto& i : c10::irange(t_list.size())) {
out[i] = t_list[i].clone();
}
return out;
}
// duped with gen_resize_out_helper from structured kernels
void copy_arg(const at::Tensor& dst, const at::Tensor& src) {
TORCH_CHECK(src.dtype() == dst.dtype(),
"Expected out tensor to have dtype ", src.dtype(), ", but got ", dst.dtype(), " instead");
TORCH_CHECK(src.device() == dst.device(),
"Expected out tensor to have device ", src.device(), ", but got ", dst.device(), " instead");
dst.copy_(src);
}
void copy_arg(const at::TensorList& dst, const at::TensorList& src) {
TORCH_INTERNAL_ASSERT(dst.size() == src.size());
for (const auto& i : c10::irange(dst.size())) {
copy_arg(dst[i], src[i]);
}
}
// TODO: this doesn't handle restriding empty tensors correctly; see
// gen_resize_out_helper for the correct algorithm
void resize_out_helper(const at::Tensor& dst, const at::Tensor& src) {
at::native::resize_output(dst, src.sizes());
}
void resize_out_helper(const at::TensorList& dst, const at::TensorList& src) {
TORCH_INTERNAL_ASSERT(dst.size() == src.size());
for (const auto& i : c10::irange(dst.size())) {
at::native::resize_output(dst[i], src[i].sizes());
}
}
}
${CompositeViewCopyKernel_Definitions}
${GeneratedCompositeFunctional_Definitions}
${GeneratedCompositeOut_Definitions}
} // namespace native
} // namespace at

View File

@ -0,0 +1,23 @@
#pragma once
// ${generated_comment}
// NB: The implementing C++ file is RegisterDispatchKey.cpp
// The only #includes we need are for custom classes that have defaults in the C++ API
#include <c10/core/MemoryFormat.h>
#include <c10/core/Scalar.h>
#include <ATen/core/Reduction.h>
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
namespace at {
namespace ${dispatch_namespace} {
${dispatch_namespaced_declarations}
} // namespace ${dispatch_namespace}
} // namespace at

View File

@ -0,0 +1,29 @@
#include <ATen/core/TensorBody.h>
// TODO Undo all logic introduced for Note [Avoiding Include Cycles In Static Dispatch]
// Code introduced to avoid cyclic dependency in static dispatch is no longer
// needed as static dispatch logic is moved from TensorBody.h, which caused cycles in the first place,
// to Operators.cpp for supporting multiple backends with multiple kernels.
//
// Note [Avoiding Include Cycles In Static Dispatch]
// In order to avoid #include cycles in the static dispatch build, we've carefully split out
// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h.
//
// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h.
// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods
// all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all
// directly inlined into TensorBody.h.
// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API,
// which include functions that have defaultable std::optional<Tensor> arguments.
// That requires knowing the full Tensor class definition.
//
// We break the cycle by doing the following:
// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h
// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl.,
// - CPUFunctions_inl.h includes everything else
// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class,
// and then it includes CPUFunctions_inl.h.
// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly.
// - This also means that static dispatch build, CPUFunctions.h only needs to
// #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h.
${inline_headers}

View File

@ -0,0 +1,22 @@
#pragma once
// ${generated_comment}
// NB: The implementing C++ file is RegisterDispatchKey.cpp
// The only #includes we need are for custom classes that have defaults in the C++ API
#include <c10/core/MemoryFormat.h>
#include <c10/core/Scalar.h>
#include <ATen/core/Reduction.h>
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from \
<ATen/ops/{my_operator}_${dispatch_namespace}_dispatch.h>. \
See NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
${DispatchKeyFunctions_inl_includes}
${dispatch_namespaced_declarations}

View File

@ -0,0 +1,13 @@
// ${generated_comment}
${includes}
${native_functions_include}
namespace {
${helper_fns}
} // namespace
${namespace_prologue}
${native_function_definitions}
${namespace_epilogue}

View File

@ -0,0 +1,19 @@
#pragma once
// an external backend might generate file within its code tree
// and check all the source files within the tree with clang-format.
// so, disable it since the backend might have a different config.
// clang-format off
// ${generated_comment}
#include <ATen/Tensor.h>
${namespace_prologue}
struct ${class_name} {
${dispatch_declarations}
};
${namespace_epilogue}

View File

@ -0,0 +1,26 @@
#pragma once
// ${generated_comment}
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
${static_dispatch_ops_headers}
${operator_includes}
namespace at {
${function_definitions}
}

View File

@ -0,0 +1,33 @@
#pragma once
// ${generated_comment}
#include <ATen/Tensor.h>
namespace at {
namespace functionalization {
enum class InverseReturnMode {
/// Specifies that functional inverses should always return a view.
AlwaysView,
/// Specifies that functional inverses should always return a non-view / copy.
NeverView,
/// Specifies that functional inverses should return a view unless a (copying) scatter
/// inverse exists, in which case that will be used instead.
/// This avoids as_strided() calls that can be difficult for subclasses to handle.
ViewOrScatterInverse,
};
struct FunctionalInverses {
${view_inverse_declarations}
// NB: These are not generated! They're manually implemented in the template.
// TODO: Change codegen to generate these. See the following link:
// https://github.com/pytorch/pytorch/blob/main/torchgen/model.py#L2583-L2585
static at::Tensor chunk_inverse(const at::Tensor & base, const at::Tensor & mutated_view, InverseReturnMode inverse_return_mode, int64_t mutated_view_idx, int chunks, int dim);
static at::Tensor narrow_inverse(const at::Tensor & base, const at::Tensor & mutated_view, InverseReturnMode inverse_return_mode, int dim, c10::SymInt start, c10::SymInt length);
};
}
}

View File

@ -0,0 +1,103 @@
#include <array>
#include <ATen/Functions.h>
#include <ATen/Utils.h>
#include <c10/core/Allocator.h>
namespace at {
Tensor TensorMaker::make_tensor() {
AutoDispatchBelowADInplaceOrView guard{}; // TODO: Remove.
tracer::impl::NoTracerDispatchMode tracer_guard{};
check_size_nonnegative(sizes_);
TORCH_CHECK_VALUE(
!deleter_ || !ctx_,
"The deleter and context arguments are mutually exclusive.");
if (device_ == std::nullopt) {
device_ = globalContext().getDeviceFromPtr(data_, opts_.device().type());
}
if (opts_.device().has_index()) {
// clang-format off
TORCH_CHECK_VALUE(
opts_.device() == *device_,
"Specified device ", opts_.device(), " does not match device of data ", *device_);
// clang-format on
}
std::size_t size_bytes = computeStorageSize();
DataPtr data_ptr{};
if (deleter_) {
data_ptr = makeDataPtrFromDeleter();
} else {
data_ptr = makeDataPtrFromContext();
}
TORCH_CHECK(!resizeable_ || allocator_ != nullptr, "Must specify an allocator with allocator() if you want to use resizeable_storage()");
Storage storage{Storage::use_byte_size_t{}, size_bytes, std::move(data_ptr), /*allocator=*/allocator_, /*resizable=*/resizeable_};
Tensor tensor = detail::make_tensor<TensorImpl>(
std::move(storage), opts_.computeDispatchKey(), opts_.dtype());
TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
if (strides_) {
tensor_impl->set_sizes_and_strides(sizes_, *strides_);
} else {
tensor_impl->set_sizes_contiguous(sizes_);
}
if (storage_offset_) {
tensor_impl->set_storage_offset(*storage_offset_);
}
return tensor;
}
std::size_t TensorMaker::computeStorageSize() const noexcept {
std::size_t itemsize = opts_.dtype().itemsize();
if (strides_) {
auto storage_size = detail::computeStorageNbytes(sizes_, *strides_, itemsize);
if (storage_offset_) {
storage_size += storage_offset_.value();
}
return storage_size;
}
std::size_t size = 1;
for (std::int64_t s : sizes_) {
size *= static_cast<std::size_t>(s);
}
auto storage_size = size * itemsize;
if (storage_offset_) {
storage_size += storage_offset_.value();
}
return storage_size;
}
inline DataPtr TensorMaker::makeDataPtrFromDeleter() noexcept {
return InefficientStdFunctionContext::makeDataPtr(data_, std::move(deleter_), *device_);
}
inline DataPtr TensorMaker::makeDataPtrFromContext() noexcept {
return DataPtr{data_, ctx_.release(), ctx_.get_deleter(), *device_};
}
IntArrayRef TensorMaker::makeTempSizes() const noexcept {
static std::int64_t zeros[5] = {0, 0, 0, 0, 0};
if (opts_.has_memory_format()) {
MemoryFormat format = *opts_.memory_format_opt();
if (format == MemoryFormat::ChannelsLast) {
return IntArrayRef(zeros, 4);
}
if (format == MemoryFormat::ChannelsLast3d) {
return IntArrayRef(zeros, 5);
}
}
return IntArrayRef(zeros, 1);
}
} // namespace at

View File

@ -0,0 +1,143 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}.h> and \
see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
// NOTE: [TORCH_ASSERT_ONLY_METHOD_OPERATORS]
//
// In ATen, certain generated headers files include the definitions of
// every single operator in PyTorch. Unfortunately this means every
// time an operator signature is updated or changed in
// native_functions.yaml, you (and every other PyTorch developer) need
// to recompile every source file that includes any of these headers.
//
// To break up these header dependencies, and improve incremental
// build times for all PyTorch developers. These headers are split
// into per-operator headers in the `ATen/ops` folder. This limits
// incremental builds to only changes to methods of `Tensor`, or files
// that use the specific operator being changed. With `at::sum` as an
// example, you should include
//
// <ATen/ops/sum.h> // instead of ATen/Functions.h
// <ATen/ops/sum_native.h> // instead of ATen/NativeFunctions.h
// <ATen/ops/sum_ops.h> // instead of ATen/Operators.h
// <ATen/ops/sum_cpu_dispatch.h> // instead of ATen/CPUFunctions.h
//
// However, even if you're careful to use this in your own code.
// `Functions.h` might be included indirectly through another header
// without you realising. To avoid this, you can add
//
// #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
//
// to the top of your source file. This way any time the non-specific
// headers are included, the compiler will error out.
//
// Also, be aware that `ops` are not available in all build
// configurations (namely fb-internal) so you must guard these
// includes with `#ifdef AT_PER_OPERATOR_HEADERS`. e.g.
//
// #ifndef AT_PER_OPERATOR_HEADERS
// #include <ATen/Functions.h>
// #else
// #include <ATen/ops/sum.h>
// #endif
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <c10/core/SymInt.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/util/OptionalArrayRef.h>
#include <ATen/ops/from_blob.h>
#include <ATen/ops/tensor.h>
${Functions_includes}
namespace at {
${Functions_declarations}
// Special C++ only overloads for std()-like functions (See gh-40287)
// These are needed because int -> bool conversion takes precedence over int -> IntArrayRef
// So, for example std(0) would select the std(unbiased=False) overload
TORCH_API inline Tensor var(const Tensor& self, int dim) {
return at::var(self, IntArrayRef{dim});
}
TORCH_API inline std::tuple<Tensor, Tensor> var_mean(const Tensor& self, int dim) {
return at::var_mean(self, IntArrayRef{dim});
}
TORCH_API inline Tensor std(const Tensor& self, int dim) {
return at::std(self, IntArrayRef{dim});
}
TORCH_API inline std::tuple<Tensor, Tensor> std_mean(const Tensor& self, int dim) {
return at::std_mean(self, IntArrayRef{dim});
}
inline int64_t numel(const Tensor& tensor) {
return tensor.numel();
}
inline int64_t size(const Tensor& tensor, int64_t dim) {
return tensor.size(dim);
}
inline int64_t stride(const Tensor& tensor, int64_t dim) {
return tensor.stride(dim);
}
inline bool is_complex(const Tensor& tensor) {
return tensor.is_complex();
}
inline bool is_floating_point(const Tensor& tensor) {
return tensor.is_floating_point();
}
inline bool is_signed(const Tensor& tensor) {
return tensor.is_signed();
}
inline bool is_inference(const Tensor& tensor) {
return tensor.is_inference();
}
inline bool _is_zerotensor(const Tensor& tensor) {
return tensor._is_zerotensor();
}
inline bool is_conj(const Tensor& tensor) {
return tensor.is_conj();
}
inline Tensor conj(const Tensor& tensor) {
return tensor.conj();
}
inline bool is_neg(const Tensor& tensor) {
return tensor.is_neg();
}
}

View File

@ -0,0 +1,19 @@
#pragma once
// This file contains autogenerated LazyTensor IR nodes
${lazy_ir_sysinc}
${lazy_ir_inc}
${namespace_prologue}
using at::operator<<;
// kNullValue is used to contribute a static hash value any time
// a node has an Optional<Value> input that is nullopt. It is important
// to differentiate between HASH(std::nullopt, something) and HASH(something, std::nullopt),
// and using kNullValue in the hash function in the order of arguments
// serves this purpose.
static const torch::lazy::Value kNullValue = torch::lazy::Value();
${ir_declarations}
${namespace_epilogue}

View File

@ -0,0 +1,11 @@
#pragma once
${lazy_non_native_ir_inc}
// This file contains autogenerated LazyTensor Non Native IR nodes
${namespace_prologue}
${non_native_ir_nodes}
${namespace_epilogue}

View File

@ -0,0 +1,24 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
${MethodOperators_includes}
namespace at {
namespace _ops {
${MethodOperators_declarations}
} // namespace _ops
} // namespace at

View File

@ -0,0 +1,17 @@
#pragma once
// ${generated_comment}
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <tuple>
#include <vector>
${extra_includes}
${native_function_declarations}

View File

@ -0,0 +1,33 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}_native.h> \
and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <tuple>
#include <vector>
${NativeFunctions_includes}
${NativeFunctions_declarations}

View File

@ -0,0 +1,23 @@
#pragma once
// ${generated_comment}
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/TensorIterator.h>
#include <ATen/TensorMeta.h>
#include <tuple>
#include <vector>
namespace at {
namespace meta {
${meta_function_declarations}
} // namespace native
} // namespace at

View File

@ -0,0 +1,19 @@
#pragma once
// ${generated_comment}
#include <ATen/core/Tensor.h>
#include <ATen/core/IListRef.h>
#include <ATen/TensorMeta.h>
#include <ATen/TensorIterator.h>
${NativeMetaFunctions_includes}
namespace at {
namespace meta {
${NativeMetaFunctions_declarations}
} // namespace meta
} // namespace at

View File

@ -0,0 +1,18 @@
#pragma once
// ${generated_comment}
#include <tuple>
#include <vector>
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
namespace at {
namespace _ops {
${declarations}
}} // namespace at::_ops

View File

@ -0,0 +1,19 @@
#include <ATen/Tensor.h>
#include <ATen/core/dispatch/Dispatcher.h>
// ${generated_comment}
// NOTE See [Sharded File] comment in VariableType
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
${operator_headers}
#endif
${static_dispatch_extra_headers}
namespace at { namespace _ops {
${definitions}
}} // namespace at::_ops

View File

@ -0,0 +1,74 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}_ops.h> \
and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
#include <c10/core/SymInt.h>
#include <c10/core/SymIntArrayRef.h>
#include <c10/core/Scalar.h>
#include <c10/core/TensorOptions.h>
#include <c10/core/QScheme.h>
#include <c10/util/OptionalArrayRef.h>
#include <tuple>
#include <vector>
${Operators_includes}
// Extension writers: do you write wrapper functions? Are you frustrated with
// resolving overloads of operators? Are you frustrated with dealing with
// pointer-to-methods and resolving overloads of pointer-to-methods?? Look no
// further, this is the utility for you.
//
// Given an operator schema: aten::op.overload(...
//
// Use ATEN_FN2(op, overload) to get a *function* version of the operator
// that is guaranteed to not be overloaded. This means that you can safely
// decltype(&ATEN_FN2(op, overload)) it. NB: the 2 means this macro takes 2 args.
//
// Given an operator schema without an overload name: aten::op(...
//
// Use ATEN_FN(op) to get an unambiguous *function* version of the operator.
//
// There is some interesting behavior for out= operations.
// ATEN_FN2(sin, out) gives a function that is *faithful* to the schema;
// that is, the order of arguments is exactly what it looks like in the schema.
#define ATEN_FN2(op_name, overload) at::_ops::op_name##_##overload::call
#define ATEN_FN(op_name) at::_ops::op_name::call
// Separately, ATEN_OP(op) and ATEN_OP2(op, overload) define a class containing compile-time
// metadata about a given aten operator.
// Notable data on the class includes:
// - ATEN_OP2(add, Tensor)::name // returns the string name: "add"
// - ATEN_OP2(add, Tensor)::overload_name // returns the string overload name: "Tensor"
// - ATEN_OP2(add, Tensor)::schema // returns the C++ schema type: at::Tensor (const at::Tensor &, const at::Tensor &, const at::Scalar &)
// - ATEN_OP2(add, Tensor)::schema_str // returns the string jit type: "add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor"
#define ATEN_OP2(op_name, overload) at::_ops::op_name##_##overload
#define ATEN_OP(op_name) at::_ops::op_name
// WARNING: Please do not call any of the ops in the _ops namespace directly.
// Use the ATEN_FN macros. We do not guarantee stability of the naming
// scheme for the functions in at::_ops
// See Note [The ATen Operators API] for details of the at::_ops namespace
namespace at {
namespace _ops {
${Operators_declarations}
} // namespace _ops
} // namespace at

View File

@ -0,0 +1,15 @@
// ${generated_comment}
#include <ATen/RedispatchFunctions.h>
#include <ATen/Functions.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/core/op_registration/adaption.h>
namespace at {
namespace redispatch {
${function_redispatch_definitions}
} // namespace redispatch
} // namespace at

View File

@ -0,0 +1,32 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_ONLY_METHOD_OPERATORS
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider using the at::_ops::{name}::redispatch() interface by including \
the specific operator from <ATen/ops/{my_operator}_ops.h>
#endif
#include <c10/core/Scalar.h>
#include <ATen/Tensor.h>
#include <c10/core/Storage.h>
#include <ATen/core/Generator.h>
#include <c10/util/Deprecated.h>
#include <ATen/DeviceGuard.h>
#include <c10/core/TensorOptions.h>
#include <ATen/core/Reduction.h>
#include <optional>
#include <ATen/TensorUtils.h>
#include <ATen/Context.h>
#include <ATen/TracerMode.h>
#include <ATen/Operators.h>
namespace at {
namespace redispatch {
${function_redispatch_definitions}
} // namespace redispatch
}

View File

@ -0,0 +1,29 @@
// We register ops with a higher priority dispatch key (BackendSelect) than the usual backend-specific keys (e.g. CPU)
// which makes calls to the factory functions dispatch to here.
// We then 'manually' compute a lower-priority to re-dispatch to (e.g. CPU) to get to the eventually correct backend.
// ${generated_comment}
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <ATen/core/dispatch/DispatchKeyExtractor.h>
#include <torch/library.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
${ops_headers}
#endif
namespace at {
namespace {
${backend_select_method_definitions}
TORCH_LIBRARY_IMPL(aten, BackendSelect, m) {
${backend_select_function_registrations};
}
} // namespace
} // at

View File

@ -0,0 +1,41 @@
#include <torch/csrc/jit/runtime/operator.h>
#include <torch/csrc/jit/runtime/custom_operator.h>
#include <torch/csrc/jit/runtime/register_ops_utils.h>
#include <ATen/UnboxingFunctions.h>
// ${generated_comment}
// NOTE [Sharded File]: This file is generated in a sharded fashion to speed up
// incremental rebuilds. See the comment at the top of
// templates/VariableType.cpp for an analogous, in-depth discussion.
//
// Generated by tools/jit/gen_unboxing.py. This file registers all ATen ops into JIT op registry instead of c10
// dispatcher. JIT op registry only takes boxed kernels, so we are calling unboxing functions in UnboxingFunctions.h
// to cast arguments into C++ types (instead of IValue) and delegate to unboxed kernels.
namespace torch { namespace jit {
using autograd::Variable;
using autograd::variable_list;
using at::Scalar;
using at::ScalarType;
using at::Tensor;
using at::TensorOptions;
using at::DeviceGuard;
using ::c10::fmap;
using ::c10::filter;
namespace {
RegisterOperators reg({
// Generated operators
${unboxed_ops}
});
} // anon namespace
}} // namespace torch::jit

View File

@ -0,0 +1,24 @@
${ns_prologue}
// NB: TORCH_LIBRARY_IMPL must be in an anonymous namespace to avoid
// ambiguity with conflicting identifiers that may have been defined in
// at namespace already.
namespace {
${dispatch_helpers}
${dispatch_anonymous_definitions}
${static_init_dispatch_registrations}
} // anonymous namespace
${deferred_dispatch_registrations}
namespace ${dispatch_namespace} {
${dispatch_namespaced_definitions}
} // namespace ${dispatch_namespace}
${ns_epilogue}

View File

@ -0,0 +1,54 @@
// required for old g++ to compile PRId64 macros, see
// https://github.com/pytorch/pytorch/issues/3571
// for context
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
// an external backend might generate file within its code tree
// and check all the source files within the tree with clang-format.
// so, disable it since the backend might have a different config.
// clang-format off
// NOTE: This condition is true for all PyTorch internal libraries, it
// just excludes external projects such as torch_xla which
// re-use some of the PyTorch codegen machinery.
#if defined(CAFFE2_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_BUILD_MAIN_LIB) || \
defined(TORCH_HIP_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_CU_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_CPP_BUILD_MAIN_LIB)
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#endif
// ${generated_comment}
#include <c10/core/TensorImpl.h>
#include <c10/core/Allocator.h>
#include <ATen/DeviceGuard.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/Utils.h>
#include <ATen/WrapDimUtils.h>
#include <ATen/Dispatch.h>
#include <c10/util/ExclusivelyOwned.h>
#include <c10/util/Half.h>
#include <c10/core/UndefinedTensorImpl.h>
#include <optional>
#include <ATen/Tensor.h>
#include <ATen/native/Resize.h>
#include <cstddef>
#include <functional>
#include <memory>
#include <utility>
#include <ATen/Config.h>
#include <ATen/core/op_registration/adaption.h>
#include <torch/library.h>
$extra_cuda_headers
$external_backend_headers
$dispatch_headers
$ops_headers
// See template file RegisterDispatchDefinitions.ini
$dispatch_definitions

View File

@ -0,0 +1,110 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include <ATen/core/LegacyTypeDispatch.h>
#include <ATen/EmptyTensor.h>
#include <ATen/FunctionalTensorWrapper.h>
#include <ATen/FunctionalInverses.h>
#include <ATen/MemoryOverlap.h>
#include <torch/library.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#include <ATen/NativeFunctions.h>
#else
// needed for the meta tensor calls to get stride info in functionalization
#include <ATen/ops/empty_strided_native.h>
// needed for special handling of copy_().
// See Note [functionalizating copy_() and not preserving strides]
#include <ATen/ops/to_ops.h>
#include <ATen/ops/expand_copy_ops.h>
$ops_headers
#endif
namespace at {
namespace functionalization {
// This keyset is used by functionalization when it calls into meta kernels
// to accurately propagate stride metadata.
// Exclude any modes: the purpose of calling into meta kernels is only as an implementation
// detail to perform shape inference, and we don't want any modal keys to run.
// Specifically, we want to prevent functionalization and Python modes from running.
constexpr auto exclude_keys_for_meta_dispatch =
c10::functorch_transforms_ks |
c10::DispatchKeySet({
c10::DispatchKey::FuncTorchDynamicLayerBackMode,
c10::DispatchKey::FuncTorchDynamicLayerFrontMode,
c10::DispatchKey::Python,
c10::DispatchKey::PreDispatch,
});
// Helper around at::has_internal_overlap.
// The ATen util is used in hot-path eager mode: it's always fast,
// but might return TOO_HARD sometimes.
// During functionalization, we're ok taking a bit longer
// to detect memory overlap.
inline bool has_internal_overlap_helper(const at::Tensor t) {
auto has_overlap = at::has_internal_overlap(t);
if (has_overlap == at::MemOverlap::Yes) return true;
if (has_overlap == at::MemOverlap::No) return false;
return false;
}
inline Tensor to_meta(const Tensor& t) {
if (!t.defined()) return t;
return at::native::empty_strided_meta_symint(t.sym_sizes(), t.sym_strides(),
/*dtype=*/std::make_optional(t.scalar_type()), /*layout=*/std::make_optional(t.layout()),
/*device=*/std::make_optional(c10::Device(kMeta)), /*pin_memory=*/std::nullopt);
}
inline std::optional<Tensor> to_meta(const std::optional<Tensor>& t) {
if (t.has_value()) {
return std::make_optional<Tensor>(to_meta(*t));
}
return std::nullopt;
}
inline std::vector<Tensor> to_meta(at::ITensorListRef t_list) {
std::vector<Tensor> outputs;
outputs.reserve(t_list.size());
for (const auto& tensor : t_list) {
outputs.push_back(to_meta(tensor));
}
return outputs;
}
inline c10::List<Tensor> to_meta(const c10::List<Tensor>& t_list) {
c10::List<Tensor> outputs;
outputs.reserve(t_list.size());
for (const auto i : c10::irange(t_list.size())) {
outputs.push_back(to_meta(t_list[i]));
}
return outputs;
}
inline c10::List<::std::optional<Tensor>> to_meta(const c10::List<::std::optional<Tensor>>& t_list) {
c10::List<::std::optional<Tensor>> outputs;
outputs.reserve(t_list.size());
for (const auto i : c10::irange(t_list.size())) {
outputs.push_back(to_meta(t_list[i]));
}
return outputs;
}
${func_definitions}
} // namespace functionalization
namespace {
TORCH_LIBRARY_IMPL(aten, Functionalize, m) {
${func_registrations};
}
} // namespace
} // namespace at

View File

@ -0,0 +1,13 @@
// ${generated_comment}
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <torch/library.h>
namespace at {
TORCH_LIBRARY(aten, m) {
${aten_schema_registrations};
// Distributed Ops
// Implementations located in torch/csrc/jit/runtime/register_distributed_ops.cpp
m.def("get_gradients(int context_id) -> Dict(Tensor, Tensor)");
}
${schema_registrations}
} // namespace at

View File

@ -0,0 +1,4 @@
// This file contains all native_functions that can be registered to
// and the schema string that they should be registered with
${registration_declarations}

View File

@ -0,0 +1,753 @@
#pragma once
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#include <c10/core/Device.h>
#include <c10/core/Layout.h>
#include <c10/core/MemoryFormat.h>
#include <c10/core/QScheme.h>
#include <c10/core/Stream.h>
#include <c10/core/Scalar.h>
#include <c10/core/ScalarType.h>
#include <c10/core/ScalarTypeToTypeMeta.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorImpl.h>
#include <c10/core/UndefinedTensorImpl.h>
#include <c10/core/WrapDimMinimal.h>
#include <c10/util/Exception.h>
#include <c10/util/ExclusivelyOwned.h>
#include <c10/util/Deprecated.h>
#include <c10/util/MaybeOwned.h>
#include <optional>
#include <c10/util/OptionalArrayRef.h>
#include <c10/util/intrusive_ptr.h>
#include <c10/macros/Export.h>
#include <ATen/core/CheckMemoryFormat.h>
#include <ATen/core/DeprecatedTypePropertiesRegistry.h>
#include <ATen/core/DeprecatedTypeProperties.h>
#include <ATen/core/NamedTensor.h>
#include <ATen/core/QuantizerBase.h>
#include <c10/core/SymInt.h>
#include <ATen/core/TensorAccessor.h>
#include <ATen/core/TensorBase.h>
#include <ATen/MethodOperators.h>
namespace c10{
template<class T> class List;
template<class T> class IListRef;
}
namespace at {
struct Generator;
struct Type;
class DeprecatedTypeProperties;
class Tensor;
} // namespace at
namespace at {
namespace indexing {
struct TensorIndex;
} // namespace indexing
} // namespace at
namespace torch { namespace autograd {
struct Node;
}} // namespace torch::autograd
namespace at {
class OptionalTensorRef;
class TensorRef;
class Tensor;
using TensorList = ArrayRef<Tensor>;
using ITensorList = c10::IListRef<Tensor>;
using Stream = c10::Stream;
// Tensor is a "generic" object holding a pointer to the underlying TensorImpl object, which
// has an embedded reference count. In this way, Tensor is similar to boost::intrusive_ptr.
//
// For example:
//
// void func(Tensor a) {
// Tensor b = a;
// ...
// }
//
// In this example, when we say Tensor b = a, we are creating a new object that points to the
// same underlying TensorImpl, and bumps its reference count. When b goes out of scope, the
// destructor decrements the reference count by calling release() on the TensorImpl it points to.
// The existing constructors, operator overloads, etc. take care to implement the correct semantics.
//
// Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and
// special care must be taken to handle this.
class TORCH_API Tensor: public TensorBase {
protected:
// Create a Tensor with a +0 reference count. Special care must be
// taken to avoid decrementing this reference count at destruction
// time. Intended to support MaybeOwnedTraits<Tensor>.
explicit Tensor(unsafe_borrow_t, const TensorBase& rhs): TensorBase(unsafe_borrow_t{}, rhs) {}
friend MaybeOwnedTraits<Tensor>;
friend OptionalTensorRef;
friend TensorRef;
public:
Tensor() = default;
// This constructor should not be used by end users and is an implementation
// detail invoked by autogenerated code.
explicit Tensor(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
: TensorBase(std::move(tensor_impl)) {}
Tensor(const Tensor &tensor) = default;
Tensor(Tensor &&tensor) = default;
// Implicitly move-constructible from TensorBase, but must be explicit to increase refcount
explicit Tensor(const TensorBase &base): TensorBase(base) {}
/*implicit*/ Tensor(TensorBase &&base): TensorBase(std::move(base)) {}
// Creates a new wrapper from TensorImpl. Intentionally a free method because
// it should be used with care. Checks necessary invariants
static Tensor wrap_tensor_impl(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl) {
return TensorBase::wrap_tensor_impl(std::move(tensor_impl));
}
Tensor contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const {
return TensorBase::contiguous(memory_format);
}
Tensor conj() const {
if (!this->is_complex()) {
return *this;
}
switch (this->layout()) {
case at::kSparse:
case at::kSparseCsr:
case at::kSparseCsc:
case at::kSparseBsr:
case at::kSparseBsc:
return this->conj_physical();
default:
return this->_conj();
}
}
// Aliased by Dimname overloads, so need explicit using
using TensorBase::size;
using TensorBase::sym_size;
using TensorBase::stride;
/// Should be used if *this can reasonably be expected to be contiguous and
/// performance is important.
/// Compared to contiguous, it saves a reference count
/// increment/decrement if *this is already contiguous, at the cost
/// in all cases of an extra pointer of stack usage, an extra branch
/// to access, and an extra branch at destruction time.
c10::MaybeOwned<Tensor> expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const &;
// Use .contiguous() instead. Trying to borrow from a prvalue Tensor
// will only lead to trouble and dangling references.
c10::MaybeOwned<Tensor> expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete;
// The following overloads are very intruiging. Consider the following
// program:
//
// x[1] = 3;
//
// We would expect that the first entry of x is written to 3. But how can we
// actually achieve this? x[1] evaluates to a tensor...
//
// The answer is, using a ref-qualifier. x[1] is an rvalue, which cannot be
// (profitably) assigned to in the traditional sense, so we overload
// assignment to mean, "Actually, copy 3 into the tensor data." This is done
// with an rvalue-reference ref-qualified overload (the methods with && at the
// end of their type.)
//
// There's one more fly in the ointment: We also want
//
// Tensor x = y;
//
// to work, and we want it NOT to copy. So we need a traditional operator=
// overload. But we MUST specify a mutable lvalue ref-qualifier, to
// disambiguate the traditional overload from the rvalue-reference
// ref-qualified overload. Otherwise, it will be ambiguous, because
// a non ref-qualified method is eligible for all situations.
// Unfortunately, we have to write these constructors out manually
// to work around an MSVC bug:
// error C2580: 'at::Tensor &at::Tensor::operator =(const at::Tensor &) &':
// multiple versions of a defaulted special member functions are not allowed
// Tensor& operator=(const Tensor&) & = default;
// Tensor& operator=(Tensor&&) & = default;
// Also MSVC will wrongly issue the following warning with the aforementioned fix
// warning C4522: 'at::Tensor': multiple assignment operators specified
// Let's just skip the warning.
//
// TODO: temporarily disabled
Tensor& operator=(const TensorBase& x) & {
impl_ = x.getIntrusivePtr();
return *this;
}
Tensor& operator=(TensorBase&& x) & noexcept {
impl_ = x.unsafeReleaseIntrusivePtr();
return *this;
}
Tensor& operator=(const Tensor &x) & {
return operator=(static_cast<const TensorBase&>(x));
}
Tensor& operator=(Tensor &&x) & noexcept {
return operator=(static_cast<TensorBase&&>(x));
}
Tensor& operator=(const Scalar &v) && {
return fill_(v);
}
Tensor& operator=(const Tensor &rhs) && {
return copy_(rhs);
}
Tensor& operator=(Tensor&& rhs) && {
return copy_(rhs);
}
C10_DEPRECATED_MESSAGE("Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device().")
DeprecatedTypeProperties & type() const {
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
dispatchKeyToBackend(legacyExtractDispatchKey(key_set())),
scalar_type());
}
Tensor toType(ScalarType t) const {
return to(options().dtype(t), /*non_blocking*/ false, /*copy*/ false);
}
// TODO: Deprecate me
Tensor toBackend(Backend b) const {
return to(options().device(backendToDeviceType(b)).layout(layout_from_backend(b)), /*non_blocking*/ false, /*copy*/ false);
}
C10_DEPRECATED_MESSAGE("Tensor.is_variable() is deprecated; everything is a variable now. (If you want to assert that variable has been appropriately handled already, use at::impl::variable_excluded_from_dispatch())")
bool is_variable() const noexcept {
return !at::impl::variable_excluded_from_dispatch();
}
template<typename T>
C10_DEPRECATED_MESSAGE("Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead.")
T * data() const {
return data_ptr<T>();
}
template <typename T>
T item() const;
template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead")
GenericPackedTensorAccessor<T,N,PtrTraits,index_t> packed_accessor() const & {
return generic_packed_accessor<T,N,PtrTraits,index_t>();
}
template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead")
GenericPackedTensorAccessor<T,N,PtrTraits,index_t> packed_accessor() && = delete;
Tensor operator~() const {
return bitwise_not();
}
Tensor operator-() const {
return neg();
}
Tensor& operator+=(const Tensor & other) {
return add_(other);
}
Tensor& operator+=(const Scalar & other) {
return add_(other);
}
Tensor& operator-=(const Tensor & other) {
return sub_(other);
}
Tensor& operator-=(const Scalar & other) {
return sub_(other);
}
Tensor& operator*=(const Tensor & other) {
return mul_(other);
}
Tensor& operator*=(const Scalar & other) {
return mul_(other);
}
Tensor& operator/=(const Tensor & other) {
return div_(other);
}
Tensor& operator/=(const Scalar & other) {
return div_(other);
}
Tensor& operator&=(const Tensor & other) {
return bitwise_and_(other);
}
Tensor& operator|=(const Tensor & other) {
return bitwise_or_(other);
}
Tensor& operator^=(const Tensor & other) {
return bitwise_xor_(other);
}
Tensor operator[](const Scalar & index) const {
if (!index.isIntegral(false)) {
TORCH_CHECK_INDEX(false, "Can only index tensors with integral scalars");
}
return this->operator[](index.toLong());
}
Tensor operator[](const Tensor & index) const {
// These properties are checked in the Scalar constructor, but we already
// check them here to provide more useful diagnostics for the user.
if (!index.defined()) {
TORCH_CHECK_INDEX(false, "Can only index with tensors that are defined");
}
if (index.dim() != 0) {
TORCH_CHECK_INDEX(false,
"Can only index with tensors that are scalars (zero-dim)");
}
// The Scalar(Tensor) constructor is explicit, so we need to call it.
return this->operator[](index.item());
}
Tensor operator[](int64_t index) const {
return select(0, index);
}
Tensor index(ArrayRef<at::indexing::TensorIndex> indices) const;
Tensor index(std::initializer_list<at::indexing::TensorIndex> indices) const;
Tensor & index_put_(ArrayRef<at::indexing::TensorIndex> indices, Tensor const & rhs);
Tensor & index_put_(ArrayRef<at::indexing::TensorIndex> indices, const Scalar& v);
Tensor & index_put_(std::initializer_list<at::indexing::TensorIndex> indices, Tensor const & rhs);
Tensor & index_put_(std::initializer_list<at::indexing::TensorIndex> indices, const Scalar& v);
Tensor cpu() const {
return to(options().device(c10::DeviceType::CPU), /*non_blocking*/ false, /*copy*/ false);
}
// TODO: The Python version also accepts arguments
Tensor cuda() const {
return to(options().device(c10::DeviceType::CUDA), /*non_blocking*/ false, /*copy*/ false);
}
Tensor hip() const {
return to(options().device(c10::DeviceType::HIP), /*non_blocking*/ false, /*copy*/ false);
}
Tensor ve() const {
return to(options().device(c10::DeviceType::VE), /*non_blocking*/ false, /*copy*/ false);
}
Tensor vulkan() const {
return to(options().device(c10::DeviceType::Vulkan), /*non_blocking*/ false, /*copy*/ false);
}
Tensor metal() const {
return to(options().device(c10::DeviceType::Metal), /*non_blocking*/ false, /*copy*/ false);
}
Tensor meta() const {
return to(options().device(c10::DeviceType::Meta), /*non_blocking*/ false, /*copy*/ false);
}
// ~~~~~ Autograd API ~~~~~
/// \fn bool is_leaf() const;
///
/// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention.
///
/// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were
/// created by the user. This means that they are not the result of an operation and so
/// `grad_fn()` is `nullptr`.
///
/// Only leaf Tensors will have their `grad()` populated during a call to `backward()`.
/// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`.
///
/// Example:
/// @code
/// auto a = torch::rand(10, torch::requires_grad());
/// std::cout << a.is_leaf() << std::endl; // prints `true`
///
/// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA);
/// std::cout << b.is_leaf() << std::endl; // prints `false`
/// // b was created by the operation that cast a cpu Tensor into a cuda Tensor
///
/// auto c = torch::rand(10, torch::requires_grad()) + 2;
/// std::cout << c.is_leaf() << std::endl; // prints `false`
/// // c was created by the addition operation
///
/// auto d = torch::rand(10).cuda();
/// std::cout << d.is_leaf() << std::endl; // prints `true`
/// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine)
///
/// auto e = torch::rand(10).cuda().requires_grad_();
/// std::cout << e.is_leaf() << std::endl; // prints `true`
/// // e requires gradients and has no operations creating it
///
/// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true));
/// std::cout << f.is_leaf() << std::endl; // prints `true`
/// // f requires grad, has no operation creating it
/// @endcode
/// \fn void backward(const Tensor & gradient={}, std::optional<bool> retain_graph=std::nullopt, bool create_graph=false, std::optional<TensorList> inputs=std::nullopt) const;
///
/// Computes the gradient of current tensor with respect to graph leaves.
///
/// The graph is differentiated using the chain rule. If the tensor is
/// non-scalar (i.e. its data has more than one element) and requires
/// gradient, the function additionally requires specifying ``gradient``.
/// It should be a tensor of matching type and location, that contains
/// the gradient of the differentiated function w.r.t. this Tensor.
///
/// This function accumulates gradients in the leaves - you might need to
/// zero them before calling it.
///
/// \param gradient Gradient w.r.t. the
/// tensor. If it is a tensor, it will be automatically converted
/// to a Tensor that does not require grad unless ``create_graph`` is True.
/// None values can be specified for scalar Tensors or ones that
/// don't require grad. If a None value would be acceptable then
/// this argument is optional.
/// \param retain_graph If ``false``, the graph used to compute
/// the grads will be freed. Note that in nearly all cases setting
/// this option to True is not needed and often can be worked around
/// in a much more efficient way. Defaults to the value of
/// ``create_graph``.
/// \param create_graph If ``true``, graph of the derivative will
/// be constructed, allowing to compute higher order derivative
/// products. Defaults to ``false``.
/// \param inputs Inputs w.r.t. which the gradient will be accumulated into
/// ``at::Tensor::grad``. All other Tensors will be ignored. If not
/// provided, the gradient is accumulated into all the leaf Tensors
/// that were used to compute the current tensor.
/// When inputs are provided and a given input is not a leaf,
/// the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients).
/// It is an implementation detail on which the user should not rely.
/// See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.
void backward(const Tensor & gradient={}, std::optional<bool> retain_graph=std::nullopt, bool create_graph=false, std::optional<TensorList> inputs=std::nullopt) const {
// NB: Adding this wrapper to _backward here because we'd like our
// 'backwards' api to accept the 'inputs' argument optionally. Since code gen
// currently does not support optional of TensorList our approach is to replace
// backward in native_functions.yaml with _backward and call it here instead.
if (inputs.has_value()) {
TORCH_CHECK(inputs.value().size() > 0, "'inputs' argument to backward cannot be empty")
this->_backward(inputs.value(), gradient, retain_graph, create_graph);
} else {
this->_backward({}, gradient, retain_graph, create_graph);
}
}
/// \fn Tensor detach() const;
///
/// Returns a new Tensor, detached from the current graph.
/// The result will never require gradient.
/// \fn Tensor & detach_() const;
///
/// Detaches the Tensor from the graph that created it, making it a leaf.
/// Views cannot be detached in-place.
/// \fn void retain_grad() const;
///
/// Enables this Tensor to have their :attr:`grad` populated during
/// :func:`backward`. This is a no-op for leaf tensors.
/// \fn bool retains_grad() const;
///
/// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be
/// populated during :func:`backward`, ``false`` otherwise.
const Tensor& set_requires_grad(bool requires_grad) const {
TensorBase::set_requires_grad(requires_grad);
return *this;
}
/// Return a mutable reference to the gradient. This is conventionally
/// used as `t.grad() = x` to set a gradient to a completely new tensor.
/// Note that this function work with a non-const Tensor and is not
/// thread safe.
Tensor& mutable_grad() const {
return impl_->mutable_grad();
}
/// This function returns an undefined tensor by default and returns a defined tensor
/// the first time a call to `backward()` computes gradients for this Tensor.
/// The attribute will then contain the gradients computed and future calls
/// to `backward()` will accumulate (add) gradients into it.
const Tensor& grad() const {
const Tensor& maybe_grad = impl_->grad();
if (!is_leaf() && !retains_grad() && !maybe_grad.defined()) {
TORCH_WARN(
"The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad "
"attribute won't be populated during autograd.backward(). If you indeed want the .grad "
"field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. "
"If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor "
"instead. See github.com/pytorch/pytorch/pull/30531 for more informations.");
}
return maybe_grad;
}
// The Forward AD API functions below are low level and are not to be used by end
// users who should use the API provided in torch/csrc/autograd.h
/// This function returns the forward gradient for this Tensor at the given level.
const Tensor& _fw_grad(uint64_t level) const {
return impl_->_fw_grad(level, *this);
}
/// This function can be used to set the value of the forward grad.
/// Note that the given new_grad might not be used directly if it has different
/// metadata (size/stride/storage offset) compared to this Tensor. In that case,
/// new_grad content will be copied into a new Tensor
void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const {
impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op);
}
// STOP. Thinking of adding a method here, which only makes use
// of other ATen methods? Define it in native_functions.yaml.
//example
//Tensor * add(Tensor & b);
${tensor_method_declarations}
// Special C++ only overloads for std()-like functions (See gh-40287)
// These are needed because int -> bool conversion takes precedence over int -> IntArrayRef
// So, for example std(0) would select the std(unbiased=False) overload
Tensor var(int dim) const {
return var(IntArrayRef{dim});
}
Tensor std(int dim) const {
return std(IntArrayRef{dim});
}
// We changed .dtype() to return a TypeMeta in #12766. Ideally, we want the
// at::kDouble and its friends to be TypeMeta's, but that hasn't happened yet.
// Before that change, we make this method to maintain BC for C++ usage like
// `x.to(y.dtype)`.
// TODO: remove following two after at::kDouble and its friends are TypeMeta's.
inline Tensor to(caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
return this->to(/*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
}
inline Tensor to(Device device, caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
return this->to(device, /*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
}
template <typename F, typename... Args>
decltype(auto) m(F func, Args&&... params) const {
return func(*this, std::forward<Args>(params)...);
}
/// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended
/// to be used from functions that need to access the `Variable`'s equivalent `Tensor`
/// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`).
///
/// One notable difference with the legacy `.data()` function is that changes to the
/// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset)
/// will not update the original `Variable`, due to the fact that this function
/// shallow-copies the `Variable`'s underlying TensorImpl.
at::Tensor tensor_data() const {
return TensorBase::tensor_data();
}
/// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data`
/// in Python, which create a new `Variable` that shares the same storage and
/// tensor metadata with the original `Variable`, but with a completely new
/// autograd history.
///
/// NOTE: If we change the tensor metadata (e.g. sizes / strides /
/// storage / storage_offset) of a variable created from `var.variable_data()`, those
/// changes will not update the original variable `var`. In `.variable_data()`, we set
/// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal,
/// in order to prevent users from changing metadata of `var.variable_data()`
/// and expecting the original variable `var` to also be updated.
at::Tensor variable_data() const {
return TensorBase::variable_data();
}
// Hooks
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
template <typename T>
using hook_return_void_t = std::enable_if_t<std::is_void<typename std::invoke_result_t<T&, Tensor>>::value, unsigned>;
template <typename T>
using hook_return_var_t = std::enable_if_t<std::is_same<typename std::invoke_result_t<T&, Tensor>, Tensor>::value, unsigned>;
/// Registers a backward hook.
///
/// The hook will be called every time a gradient with respect to the Tensor is computed.
/// The hook should have one of the following signature:
/// ```
/// hook(Tensor grad) -> Tensor
/// ```
/// ```
/// hook(Tensor grad) -> void
/// ```
/// The hook should not modify its argument, but it can optionally return a new gradient
/// which will be used in place of `grad`.
///
/// This function returns the index of the hook in the list which can be used to remove hook.
///
/// Example:
/// @code
/// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad());
/// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient
/// v.backward(torch::tensor({1., 2., 3.}));
/// // This prints:
/// // ```
/// // 2
/// // 4
/// // 6
/// // [ CPUFloatType{3} ]
/// // ```
/// std::cout << v.grad() << std::endl;
/// v.remove_hook(h); // removes the hook
/// @endcode
template <typename T>
hook_return_void_t<T> register_hook(T&& hook) const;
template <typename T>
hook_return_var_t<T> register_hook(T&& hook) const;
// Variable methods
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tensor data() const {
return TensorBase::data();
}
void _backward(TensorList inputs, const std::optional<Tensor>& gradient, std::optional<bool> keep_graph, bool create_graph) const;
const Tensor& requires_grad_(bool _requires_grad=true) const {
TensorBase::requires_grad_(_requires_grad);
return *this;
}
};
namespace detail {
// Helper creator for Tensor class which doesn't requires the users to pass
// in an intrusive_ptr instead it just converts the argument passed to
// requested intrusive_ptr type.
template <typename T, typename... Args>
Tensor make_tensor(Args&&... args) {
return Tensor(c10::make_intrusive<T>(std::forward<Args>(args)...));
}
} // namespace detail
} // namespace at
namespace at {
${tensor_method_definitions}
} // namespace at
namespace c10 {
template <>
struct MaybeOwnedTraits<at::Tensor> {
using owned_type = at::Tensor;
using borrow_type = at::Tensor;
static borrow_type createBorrow(const owned_type& from) {
// NOTE: this can be implemented without the special
// unsafe_borrow_t Tensor constructor as
//
// return borrow_type(c10::intrusive_ptr<at::TensorImpl, at::UndefinedTensorImpl>::reclaim(from.unsafeGetTensorImpl()));
//
// but that hurts inlining due to the nullptr check in the
// Tensor(c10::intrusive_ptr<...>) constructor. We already know
// that from.impl_ isn't null because from is a valid Tensor, so
// we needn't do the check again. (using __builtin_assume can
// avoid this, but wouldn't be portable to MSVC.)
return borrow_type(borrow_type::unsafe_borrow_t{}, from);
}
static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) {
lhs.unsafeReleaseTensorImpl();
// See above note: this can be implemented with public API
// similarly to createBorrow(), but that would hurt inlining.
lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs);
}
static void destroyBorrow(borrow_type& toDestroy) {
toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0.
}
static const owned_type& referenceFromBorrow(const borrow_type& borrow) {
return borrow;
}
static const owned_type* pointerFromBorrow(const borrow_type& borrow) {
return &borrow;
}
static bool debugBorrowIsValid(const borrow_type& /*borrow*/) {
return true;
}
};
template <>
struct ExclusivelyOwnedTraits<at::Tensor> {
using repr_type = at::Tensor;
using pointer_type = at::Tensor*;
using const_pointer_type = const at::Tensor*;
static repr_type nullRepr() {
return at::Tensor();
}
template <class... Args>
static repr_type createInPlace(Args&&... args) {
return at::Tensor(std::forward<Args>(args)...);
}
static repr_type moveToRepr(at::Tensor&& x) {
return std::move(x);
}
static void destroyOwned(at::Tensor& x) {
return ExclusivelyOwnedTraits<at::TensorBase>::destroyOwned(x);
}
static at::Tensor take(at::Tensor& x) {
return std::move(x);
}
static pointer_type getImpl(repr_type& x) {
return &x;
}
static const_pointer_type getImpl(const repr_type& x) {
return &x;
}
};
} // namespace c10
namespace at {
inline c10::MaybeOwned<Tensor> borrow_from_optional_tensor(
const std::optional<Tensor>& opt) {
return opt.has_value()
? c10::MaybeOwned<Tensor>::borrowed(*opt)
: c10::MaybeOwned<Tensor>::owned(std::in_place);
}
inline c10::MaybeOwned<Tensor> Tensor::expect_contiguous(MemoryFormat memory_format) const & {
if (is_contiguous(memory_format)) {
return c10::MaybeOwned<Tensor>::borrowed(*this);
} else {
return c10::MaybeOwned<Tensor>::owned(__dispatch_contiguous(memory_format));
}
}
} // namespace at

View File

@ -0,0 +1,61 @@
#include <c10/core/Scalar.h>
#include <ATen/core/TensorBody.h>
#include <c10/util/string_view.h>
namespace at {
namespace {
// Verifies the requested type is the same as the Tensor's type.
void check_type(const TensorBase& tensor, ScalarType type, c10::string_view type_name) {
TORCH_CHECK(
tensor.scalar_type() == type
|| (isQIntType(tensor.scalar_type())
&& toUnderlying(tensor.scalar_type()) == type),
"expected scalar type ", type_name, " but found ", tensor.scalar_type());
}
} // namespace
#define DEFINE_CAST(T, name) \
template <> \
TORCH_API const T* TensorBase::const_data_ptr() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->data_ptr_impl<T>(); \
} \
\
template <> \
TORCH_API const T* TensorBase::const_data_ptr<const T>() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->data_ptr_impl<std::remove_const_t<T>>(); \
} \
\
template <> \
TORCH_API T* TensorBase::mutable_data_ptr() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->mutable_data_ptr_impl<T>(); \
} \
\
template <> \
TORCH_API T* TensorBase::data_ptr() const { \
return mutable_data_ptr<T>(); \
} \
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CAST)
AT_FORALL_QINT_TYPES(DEFINE_CAST)
DEFINE_CAST(uint16_t, UInt16)
DEFINE_CAST(uint32_t, UInt32)
DEFINE_CAST(uint64_t, UInt64)
#undef DEFINE_CAST
#define DEFINE_ITEM(T, name) \
template <> \
TORCH_API T Tensor::item() const { \
return item().to##name(); \
}
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ITEM)
#undef DEFINE_ITEM
} //namespace at

View File

@ -0,0 +1,19 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/DispatchStub.h>
#include <ATen/TensorIterator.h>
#include <ATen/TensorMeta.h>
namespace at {
// NB: this is explicitly copied here (via codegen) rather than
// included via NativeFunctions.h to avoid recompiling this file when
// NativeFunctions.h changes
namespace meta {
${meta_declaration}
}
namespace native {
${native_declaration}
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,14 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/ufunc/${name}.h>
#include <ATen/native/DispatchStub.h>
#include <ATen/TensorIterator.h>
#include <ATen/native/cpu/Loops.h>
#include <ATen/cpu/vec/vec.h>
#include <ATen/Dispatch.h>
#include <c10/core/Scalar.h>
namespace at {
namespace native {
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,21 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/ufunc/${name}.h>
#include <ATen/Dispatch.h>
#include <ATen/native/DispatchStub.h>
#include <c10/core/Scalar.h>
${cuda_headers}
namespace at {
// NB: this is explicitly copied here (via codegen) rather than
// included via NativeFunctions.h to avoid recompiling this file when
// NativeFunctions.h changes
namespace meta {
${meta_declaration}
}
namespace native {
${native_declaration}
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,35 @@
#include <ATen/UnboxingFunctions.h>
#include <ATen/Functions.h>
#include <ATen/Tensor.h>
#include <ATen/core/functional.h>
#include <ATen/core/interned_strings.h>
#include <ATen/core/ivalue.h>
#include <ATen/core/stack.h>
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstring>
#include <sstream>
#include <stdexcept>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
namespace at {
namespace unboxing {
using ::c10::fmap;
using ::c10::filter;
using torch::jit::peek;
using torch::jit::drop;
using torch::jit::pack;
using torch::jit::pop;
// Generated function declaration
${definitions}
} // namespace unboxing
} // namespace at

View File

@ -0,0 +1,32 @@
// ${generated_comment}
// Generated by tools/jit/gen_unboxing.py. This file declares code generated boxed C++ functions for operators,
// base off of native_functions.yaml (or similar yaml file with the same syntax). The definition of such a boxed
// function will pop out IValues from the stack then convert them into the correct C++ types based on given schema. This
// unboxing logic is an alternative to template-based metaprogramming unboxing.
#pragma once
#include <ATen/ATen.h>
namespace at {
namespace unboxing {
namespace {
template<typename T, size_t N>
std::array<T, N> as_array(const c10::List<c10::IValue>& list) {
std::array<T, N> res;
AT_ASSERT(list.size() == N);
std::vector<T> vec;
for (c10::IValue elem : list) {
vec.push_back(elem.to<T>());
}
std::copy(vec.begin(), vec.end(), res.begin());
return res;
}
} // namespace <anonymous>
using Stack = std::vector<c10::IValue>;
// Generated function declaration
${declarations}
} // namespace unboxing
} // namespace at

View File

@ -0,0 +1,22 @@
#pragma once
// ${generated_comment}
#if defined(TORCH_ASSERT_NO_OPERATORS) || defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if including <ATen/core/symbol.h> for \
the c10::Symbol class would be sufficient, or if your change would be \
better placed in another file.
#endif
// ATen symbols correspond exactly to operators defined in ATen. Every
// symbol here corresponds exactly to an ATen operation defined in
// native_functions.yaml; attributes are in one-to-one correspondence
// with their ATen name.
#define FORALL_ATEN_BASE_SYMBOLS(_) \
${aten_symbols}
#define FORALL_ATTR_BASE_SYMBOLS(_) \
${attr_symbols}

View File

@ -0,0 +1,10 @@
#pragma once
// ${generated_comment}
namespace at {
// Enum of valid tags obtained from the entries in tags.yaml
enum class Tag {
${enum_of_valid_tags}
};
}

View File

@ -0,0 +1,4 @@
load("//:tools/bazel.bzl", "rules")
load(":build.bzl", "define_targets")
define_targets(rules = rules)

View File

@ -0,0 +1,3 @@
If you add a file to this directory, you **MUST** update
`torch/CMakeLists.txt` and add the file as a dependency to
the `add_custom_command` call.

View File

@ -0,0 +1,14 @@
def define_targets(rules):
rules.py_library(
name = "autograd",
srcs = rules.glob(["*.py"]),
data = rules.glob([
"*.yaml",
"templates/*",
]),
visibility = ["//:__subpackages__"],
deps = [
rules.requirement("PyYAML"),
"//torchgen",
],
)

View File

@ -0,0 +1,31 @@
import functools
from typing import Callable
from torchgen.api.autograd import NativeFunctionWithDifferentiabilityInfo as NFWDI
from torchgen.context import native_function_manager
from torchgen.utils import T
# Like tools.api.context.with_native_function, but for
# NativeFunctionWithDifferentiabilityInfo.
def with_native_function_with_differentiability_info(
func: Callable[[NFWDI], T]
) -> Callable[[NFWDI], T]:
@functools.wraps(func)
def wrapper(f: NFWDI) -> T:
with native_function_manager(f.func):
return func(f)
return wrapper
# Like the above but with an additional dispatch key string argument
def with_native_function_with_differentiability_info_and_key(
func: Callable[[NFWDI, str], T]
) -> Callable[[NFWDI, str], T]:
@functools.wraps(func)
def wrapper(f: NFWDI, key: str) -> T:
with native_function_manager(f.func):
return func(f, key)
return wrapper

View File

@ -0,0 +1,134 @@
# Deprecated function signatures. These are exposed in Python, but not included
# in the error message suggestions.
- name: add(Tensor self, Scalar alpha, Tensor other) -> Tensor
aten: add(self, other, alpha)
- name: add_(Tensor(a!) self, Scalar alpha, Tensor other) -> Tensor(a!)
aten: add_(self, other, alpha)
- name: add(Tensor self, Scalar alpha, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
aten: add_out(out, self, other, alpha)
- name: addbmm(Scalar beta, Tensor self, Scalar alpha, Tensor batch1, Tensor batch2) -> Tensor
aten: addbmm(self, batch1, batch2, beta, alpha)
- name: addbmm_(Scalar beta, Tensor(a!) self, Scalar alpha, Tensor batch1, Tensor batch2) -> Tensor(a!)
aten: addbmm_(self, batch1, batch2, beta, alpha)
- name: addbmm(Scalar beta, Tensor self, Scalar alpha, Tensor batch1, Tensor batch2, *, Tensor(a!) out) -> Tensor(a!)
aten: addbmm_out(out, self, batch1, batch2, beta, alpha)
- name: addbmm(Scalar beta, Tensor self, Tensor batch1, Tensor batch2) -> Tensor
aten: addbmm(self, batch1, batch2, beta, 1)
- name: addbmm_(Scalar beta, Tensor(a!) self, Tensor batch1, Tensor batch2) -> Tensor(a!)
aten: addbmm_(self, batch1, batch2, beta, 1)
- name: addbmm(Scalar beta, Tensor self, Tensor batch1, Tensor batch2, *, Tensor(a!) out) -> Tensor(a!)
aten: addbmm_out(out, self, batch1, batch2, beta, 1)
- name: addcdiv(Tensor self, Scalar value, Tensor tensor1, Tensor tensor2) -> Tensor
aten: addcdiv(self, tensor1, tensor2, value)
- name: addcdiv_(Tensor(a!) self, Scalar value, Tensor tensor1, Tensor tensor2) -> Tensor(a!)
aten: addcdiv_(self, tensor1, tensor2, value)
- name: addcdiv(Tensor self, Scalar value, Tensor tensor1, Tensor tensor2, *, Tensor(a!) out) -> Tensor(a!)
aten: addcdiv_out(out, self, tensor1, tensor2, value)
- name: addcmul(Tensor self, Scalar value, Tensor tensor1, Tensor tensor2) -> Tensor
aten: addcmul(self, tensor1, tensor2, value)
- name: addcmul_(Tensor(a!) self, Scalar value, Tensor tensor1, Tensor tensor2) -> Tensor(a!)
aten: addcmul_(self, tensor1, tensor2, value)
- name: addcmul(Tensor self, Scalar value, Tensor tensor1, Tensor tensor2, *, Tensor(a!) out) -> Tensor(a!)
aten: addcmul_out(out, self, tensor1, tensor2, value)
- name: addmm(Scalar beta, Tensor self, Scalar alpha, Tensor mat1, Tensor mat2) -> Tensor
aten: addmm(self, mat1, mat2, beta, alpha)
- name: addmm_(Scalar beta, Tensor(a!) self, Scalar alpha, Tensor mat1, Tensor mat2) -> Tensor(a!)
aten: addmm_(self, mat1, mat2, beta, alpha)
- name: addmm(Scalar beta, Tensor self, Scalar alpha, Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
aten: addmm_out(out, self, mat1, mat2, beta, alpha)
- name: addmm(Scalar beta, Tensor self, Tensor mat1, Tensor mat2) -> Tensor
aten: addmm(self, mat1, mat2, beta, 1)
- name: addmm_(Scalar beta, Tensor(a!) self, Tensor mat1, Tensor mat2) -> Tensor(a!)
aten: addmm_(self, mat1, mat2, beta, 1)
- name: addmm(Scalar beta, Tensor self, Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
aten: addmm_out(out, self, mat1, mat2, beta, 1)
- name: sspaddmm(Scalar beta, Tensor self, Scalar alpha, Tensor mat1, Tensor mat2) -> Tensor
aten: sspaddmm(self, mat1, mat2, beta, alpha)
- name: sspaddmm(Scalar beta, Tensor self, Tensor mat1, Tensor mat2) -> Tensor
aten: sspaddmm(self, mat1, mat2, beta, 1)
- name: addmv(Scalar beta, Tensor self, Scalar alpha, Tensor mat, Tensor vec) -> Tensor
aten: addmv(self, mat, vec, beta, alpha)
- name: addmv_(Scalar beta, Tensor(a!) self, Scalar alpha, Tensor mat, Tensor vec) -> Tensor(a!)
aten: addmv_(self, mat, vec, beta, alpha)
- name: addmv(Scalar beta, Tensor self, Scalar alpha, Tensor mat, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
aten: addmv_out(out, self, mat, vec, beta, alpha)
- name: addmv(Scalar beta, Tensor self, Tensor mat, Tensor vec) -> Tensor
aten: addmv(self, mat, vec, beta, 1)
- name: addmv_(Scalar beta, Tensor(a!) self, Tensor mat, Tensor vec) -> Tensor(a!)
aten: addmv_(self, mat, vec, beta, 1)
- name: addmv(Scalar beta, Tensor self, Tensor mat, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
aten: addmv_out(out, self, mat, vec, beta, 1)
- name: addr(Scalar beta, Tensor self, Scalar alpha, Tensor vec1, Tensor vec2) -> Tensor
aten: addr(self, vec1, vec2, beta, alpha)
- name: addr_(Scalar beta, Tensor(a!) self, Scalar alpha, Tensor vec1, Tensor vec2) -> Tensor(a!)
aten: addr_(self, vec1, vec2, beta, alpha)
- name: addr(Scalar beta, Tensor self, Scalar alpha, Tensor vec1, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)
aten: addr_out(out, self, vec1, vec2, beta, alpha)
- name: addr(Scalar beta, Tensor self, Tensor vec1, Tensor vec2) -> Tensor
aten: addr(self, vec1, vec2, beta, 1)
- name: addr_(Scalar beta, Tensor(a!) self, Tensor vec1, Tensor vec2) -> Tensor(a!)
aten: addr_(self, vec1, vec2, beta, 1)
- name: addr(Scalar beta, Tensor self, Tensor vec1, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)
aten: addr_out(out, self, vec1, vec2, beta, 1)
- name: baddbmm(Scalar beta, Tensor self, Scalar alpha, Tensor batch1, Tensor batch2) -> Tensor
aten: baddbmm(self, batch1, batch2, beta, alpha)
- name: baddbmm_(Scalar beta, Tensor(a!) self, Scalar alpha, Tensor batch1, Tensor batch2) -> Tensor(a!)
aten: baddbmm_(self, batch1, batch2, beta, alpha)
- name: baddbmm(Scalar beta, Tensor self, Scalar alpha, Tensor batch1, Tensor batch2, *, Tensor(a!) out) -> Tensor(a!)
aten: baddbmm_out(out, self, batch1, batch2, beta, alpha)
- name: baddbmm(Scalar beta, Tensor self, Tensor batch1, Tensor batch2) -> Tensor
aten: baddbmm(self, batch1, batch2, beta, 1)
- name: baddbmm_(Scalar beta, Tensor(a!) self, Tensor batch1, Tensor batch2) -> Tensor(a!)
aten: baddbmm_(self, batch1, batch2, beta, 1)
- name: baddbmm(Scalar beta, Tensor self, Tensor batch1, Tensor batch2, *, Tensor(a!) out) -> Tensor(a!)
aten: baddbmm_out(out, self, batch1, batch2, beta, 1)
- name: sub(Tensor self, Scalar alpha, Tensor other) -> Tensor
aten: sub(self, other, alpha)
- name: sub_(Tensor(a!) self, Scalar alpha, Tensor other) -> Tensor(a!)
aten: sub_(self, other, alpha)
- name: sub(Tensor self, Scalar alpha, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
aten: sub_out(out, self, other, alpha)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,132 @@
"""
For procedural tests needed for __torch_function__, we use this function
to export method names and signatures as needed by the tests in
test/test_overrides.py.
python -m tools.autograd.gen_annotated_fn_args \
aten/src/ATen/native/native_functions.yaml \
aten/src/ATen/native/tags.yaml \
$OUTPUT_DIR \
tools/autograd
Where $OUTPUT_DIR is where you would like the files to be
generated. In the full build system, OUTPUT_DIR is
torch/testing/_internal/generated
"""
from __future__ import annotations
import argparse
import os
import textwrap
from collections import defaultdict
from typing import Any, Sequence, TYPE_CHECKING
import torchgen.api.python as python
from torchgen.context import with_native_function
from torchgen.gen import parse_native_yaml
from torchgen.utils import FileManager
from .gen_python_functions import (
is_py_fft_function,
is_py_linalg_function,
is_py_nn_function,
is_py_special_function,
is_py_torch_function,
is_py_variable_method,
should_generate_py_binding,
)
if TYPE_CHECKING:
from torchgen.model import Argument, BaseOperatorName, NativeFunction
def gen_annotated(
native_yaml_path: str, tags_yaml_path: str, out: str, autograd_dir: str
) -> None:
native_functions = parse_native_yaml(
native_yaml_path, tags_yaml_path
).native_functions
mappings = (
(is_py_torch_function, "torch._C._VariableFunctions"),
(is_py_nn_function, "torch._C._nn"),
(is_py_linalg_function, "torch._C._linalg"),
(is_py_special_function, "torch._C._special"),
(is_py_fft_function, "torch._C._fft"),
(is_py_variable_method, "torch.Tensor"),
)
annotated_args: list[str] = []
for pred, namespace in mappings:
groups: dict[BaseOperatorName, list[NativeFunction]] = defaultdict(list)
for f in native_functions:
if not should_generate_py_binding(f) or not pred(f):
continue
groups[f.func.name.name].append(f)
for group in groups.values():
for f in group:
annotated_args.append(f"{namespace}.{gen_annotated_args(f)}")
template_path = os.path.join(autograd_dir, "templates")
fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
fm.write_with_template(
"annotated_fn_args.py",
"annotated_fn_args.py.in",
lambda: {
"annotated_args": textwrap.indent("\n".join(annotated_args), " "),
},
)
@with_native_function
def gen_annotated_args(f: NativeFunction) -> str:
def _get_kwargs_func_exclusion_list() -> list[str]:
# functions that currently don't work with kwargs in test_overrides.py
return [
"diagonal",
"round_",
"round",
"scatter_",
]
def _add_out_arg(
out_args: list[dict[str, Any]], args: Sequence[Argument], *, is_kwarg_only: bool
) -> None:
for arg in args:
if arg.default is not None:
continue
out_arg: dict[str, Any] = {}
out_arg["is_kwarg_only"] = str(is_kwarg_only)
out_arg["name"] = arg.name
out_arg["simple_type"] = python.argument_type_str(
arg.type, simple_type=True
)
size_t = python.argument_type_size(arg.type)
if size_t:
out_arg["size"] = size_t
out_args.append(out_arg)
out_args: list[dict[str, Any]] = []
_add_out_arg(out_args, f.func.arguments.flat_positional, is_kwarg_only=False)
if f"{f.func.name.name}" not in _get_kwargs_func_exclusion_list():
_add_out_arg(out_args, f.func.arguments.flat_kwarg_only, is_kwarg_only=True)
return f"{f.func.name.name}: {repr(out_args)},"
def main() -> None:
parser = argparse.ArgumentParser(description="Generate annotated_fn_args script")
parser.add_argument(
"native_functions", metavar="NATIVE", help="path to native_functions.yaml"
)
parser.add_argument("tags", metavar="TAGS", help="path to tags.yaml")
parser.add_argument("out", metavar="OUT", help="path to output directory")
parser.add_argument(
"autograd", metavar="AUTOGRAD", help="path to template directory"
)
args = parser.parse_args()
gen_annotated(args.native_functions, args.tags, args.out, args.autograd)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,147 @@
"""
To run this file by hand from the root of the PyTorch
repository, run:
python -m tools.autograd.gen_autograd \
aten/src/ATen/native/native_functions.yaml \
aten/src/ATen/native/tags.yaml \
$OUTPUT_DIR \
tools/autograd
Where $OUTPUT_DIR is where you would like the files to be
generated. In the full build system, OUTPUT_DIR is
torch/csrc/autograd/generated/
"""
# gen_autograd.py generates C++ autograd functions and Python bindings.
#
# It delegates to the following scripts:
#
# gen_autograd_functions.py: generates subclasses of torch::autograd::Node
# gen_variable_type.py: generates VariableType.h which contains all tensor methods
# gen_python_functions.py: generates Python bindings to THPVariable
#
from __future__ import annotations
import argparse
import os
from torchgen.api import cpp
from torchgen.api.autograd import (
match_differentiability_info,
NativeFunctionWithDifferentiabilityInfo,
)
from torchgen.gen import parse_native_yaml
from torchgen.selective_build.selector import SelectiveBuilder
from . import gen_python_functions
from .gen_autograd_functions import (
gen_autograd_functions_lib,
gen_autograd_functions_python,
)
from .gen_inplace_or_view_type import gen_inplace_or_view_type
from .gen_trace_type import gen_trace_type
from .gen_variable_factories import gen_variable_factories
from .gen_variable_type import gen_variable_type
from .gen_view_funcs import gen_view_funcs
from .load_derivatives import load_derivatives
def gen_autograd(
native_functions_path: str,
tags_path: str,
out: str,
autograd_dir: str,
operator_selector: SelectiveBuilder,
disable_autograd: bool = False,
) -> None:
# Parse and load derivatives.yaml
differentiability_infos, used_dispatch_keys = load_derivatives(
os.path.join(autograd_dir, "derivatives.yaml"), native_functions_path, tags_path
)
template_path = os.path.join(autograd_dir, "templates")
native_funcs = parse_native_yaml(native_functions_path, tags_path).native_functions
fns = sorted(
filter(
operator_selector.is_native_function_selected_for_training, native_funcs
),
key=lambda f: cpp.name(f.func),
)
fns_with_diff_infos: list[
NativeFunctionWithDifferentiabilityInfo
] = match_differentiability_info(fns, differentiability_infos)
# Generate VariableType.h/cpp
if not disable_autograd:
gen_variable_type(
out,
native_functions_path,
tags_path,
fns_with_diff_infos,
template_path,
used_dispatch_keys,
)
gen_inplace_or_view_type(
out, native_functions_path, tags_path, fns_with_diff_infos, template_path
)
# operator filter not applied as tracing sources are excluded in selective build
gen_trace_type(out, native_funcs, template_path)
# Generate Functions.h/cpp
gen_autograd_functions_lib(out, differentiability_infos, template_path)
# Generate variable_factories.h
gen_variable_factories(out, native_functions_path, tags_path, template_path)
# Generate ViewFuncs.h/cpp
gen_view_funcs(out, fns_with_diff_infos, template_path)
def gen_autograd_python(
native_functions_path: str,
tags_path: str,
out: str,
autograd_dir: str,
) -> None:
differentiability_infos, _ = load_derivatives(
os.path.join(autograd_dir, "derivatives.yaml"), native_functions_path, tags_path
)
template_path = os.path.join(autograd_dir, "templates")
# Generate Functions.h/cpp
gen_autograd_functions_python(out, differentiability_infos, template_path)
# Generate Python bindings
deprecated_path = os.path.join(autograd_dir, "deprecated.yaml")
gen_python_functions.gen(
out, native_functions_path, tags_path, deprecated_path, template_path
)
def main() -> None:
parser = argparse.ArgumentParser(description="Generate autograd C++ files script")
parser.add_argument(
"native_functions", metavar="NATIVE", help="path to native_functions.yaml"
)
parser.add_argument("tags", metavar="NATIVE", help="path to tags.yaml")
parser.add_argument("out", metavar="OUT", help="path to output directory")
parser.add_argument(
"autograd", metavar="AUTOGRAD", help="path to autograd directory"
)
args = parser.parse_args()
gen_autograd(
args.native_functions,
args.tags,
args.out,
args.autograd,
SelectiveBuilder.get_nop_selector(),
)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,925 @@
# Generates C++ autograd functions for the derivatives of ATen operations
#
# This writes two files:
# Functions.h/cpp: subclasses of autograd::Node
# python_functions.h/cpp: Python bindings for the above classes
#
from __future__ import annotations
from typing import Sequence
from torchgen.api.autograd import (
Derivative,
DifferentiabilityInfo,
SavedAttribute,
uses_retain_variables,
uses_single_grad,
)
from torchgen.api.types import (
ArrayRefCType,
BaseCppType,
BaseCType,
Binding,
boolT,
doubleT,
intArrayRefT,
iTensorListRefT,
ListCType,
longT,
MutRefCType,
OptionalCType,
optionalIntArrayRefT,
optionalSymIntArrayRefT,
scalarT,
stringT,
symIntArrayRefT,
SymIntT,
TENSOR_LIST_LIKE_CTYPES,
tensorListT,
tensorT,
VectorCType,
)
from torchgen.code_template import CodeTemplate
from torchgen.model import Argument, FunctionSchema
from torchgen.utils import FileManager
from .gen_inplace_or_view_type import VIEW_FUNCTIONS
FUNCTION_DECLARATION = CodeTemplate(
"""\
#ifdef _WIN32
struct ${op} : public ${superclass} {
TORCH_API ${op}() = default;
#else
struct TORCH_API ${op} : public ${superclass} {
#endif
using ${superclass}::${superclass};
variable_list apply(variable_list&& grads) override;
std::string name() const override { return "${op}"; }
void release_variables() override {
${thread_lock}
${release_variables}
}
${will_release_variables}
void compiled_args(CompiledNodeArgs& args) override;
variable_list apply_with_saved(const variable_list& inputs, SwapSavedVariables& saved) override;
${saved_variables}
${saved_list_sizes}
};
"""
)
WILL_RELEASE_VARIABLES = CodeTemplate(
"""\
bool retain_variables = true;
void will_release_variables() override {
retain_variables = false;
}
"""
)
FUNCTION_DEFINITION = CodeTemplate(
"""\
variable_list ${op}::apply(variable_list&& grads) {
${thread_lock}
${asserts}
IndexRangeGenerator gen;
${compute_index_ranges}
variable_list grad_inputs(gen.size());
${body}
return grad_inputs;
}
void ${op}::compiled_args(CompiledNodeArgs& args) {
${compiled_args}
}
variable_list ${op}::apply_with_saved(const variable_list& grads, SwapSavedVariables& saved) {
${apply_with_saved_before}
variable_list result = apply(variable_list(grads));
${apply_with_saved_after}
return result;
}
"""
)
GRAD_INPUT_MASK = CodeTemplate(
"""\
auto grad_input_mask = std::array<bool, ${n}>{
${masks}
};\
"""
)
DERIVATIVE_SINGLE = CodeTemplate(
"""\
if (task_should_compute_output({ ${name}_ix })) {
auto grad_result = ${derivative};
copy_range(grad_inputs, ${name}_ix, grad_result);
}
"""
)
# note(crcrpar): `self` argument and other optional positional argument
# of foreach functions are basically a list of n `Tensor`s thus iterating over
# `grads` in order to utilize and apply the existing derivative definitions
# to each `Tensor`(s) of `self`, and the others.
DERIVATIVE_SINGLE_FOREACH = CodeTemplate(
"""\
if (task_should_compute_output({ ${name}_ix })) {
std::vector<Tensor> grad_result;
grad_result.reserve(grads.size());
for (const auto & i : c10::irange(grads.size())) {
if (grads[i].defined()) {
grad_result.emplace_back(${derivative});
} else {
grad_result.emplace_back(Tensor());
}
}
copy_range(grad_inputs, ${name}_ix, grad_result);
}
"""
)
DERIVATIVE_MULTI_COPY_RANGE = CodeTemplate(
"""\
if (task_should_compute_output({ ${name}_ix })) {
copy_range(grad_inputs, ${name}_ix, std::get<${i}>(grad_result));
}
"""
)
DERIVATIVE_MULTI = CodeTemplate(
"""\
if (task_should_compute_output({ ${idx_ranges} })) {
${grad_input_mask}
auto grad_result = ${derivative};
${copy_ranges}
}
"""
)
# Generates python bindings
#
# This generates the definitions for:
# (1) The PyTypeObject for each backward grad_fn subclassing Node
# (2) The entry for PyTypeObject's tp_getset slot (an array of PyGetSetDef structs)
# We generate one PyGetSetDef struct for each of grad_fn's saved inputs and outputs
# Each PyGetSetDef has a function ptr to a getter, also defined here (3).
# (3) Getters for each of grad_fn's saved inputs and outputs.
#
PY_FUNCTION_DEFINITION = CodeTemplate(
"""\
static PyTypeObject ${op}Class;
addClass<${op}>(module, ${op}Class, "${op}", ${op}_properties);
"""
)
PY_FUNCTION_PROPS_AND_GETTERS = CodeTemplate(
"""\
${all_getter_definitions}
static struct PyGetSetDef ${op}_properties[] = {
THP_FUNCTION_DEFAULT_PROPERTIES,
${all_getsetdef_structs}
{nullptr} /* sentinel */
};
"""
)
PY_GETSETDEF_STRUCT = CodeTemplate(
"""\
{(char*)"_saved_${name}", (getter)THP${op}_${name}_getter, nullptr, nullptr, nullptr}"""
)
PY_RAW_GETSETDEF_STRUCT = CodeTemplate(
"""\
{(char*)"_raw_saved_${name}", (getter)THP${op}_${name}_raw_getter, nullptr, nullptr, nullptr}"""
)
# Getter templates
GETTER_DEFINITION = CodeTemplate(
"""\
PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
auto prop = static_cast<${op}*>(self->cdata.get())->${name};
${body}
END_HANDLE_TH_ERRORS
}
"""
)
GETTER_DEFINITION_SAVEDVAR = CodeTemplate(
"""\
PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
const auto& prop = static_cast<${op}*>(self->cdata.get())->${name}_;
${body}
END_HANDLE_TH_ERRORS
}
"""
)
GETTER_DEFINITION_RAW_SAVEDVAR = CodeTemplate(
"""\
PyObject* THP${op}_${name}_raw_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
const auto& prop = static_cast<${op}*>(self->cdata.get())->${name}_;
${body}
END_HANDLE_TH_ERRORS
}
"""
)
GETTER_DEFINITION_VEC_SAVEDVAR = CodeTemplate(
"""\
PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
const auto *node = static_cast<${op}*>(self->cdata.get());
const auto& prop = node->${name}_;
if (node->${name}_released_) {
PyErr_SetString(PyExc_RuntimeError, ERR_BACKWARD_TWICE);
return nullptr;
}
${body}
END_HANDLE_TH_ERRORS
}
"""
)
GETTER_DEFINITION_RAW_VEC_SAVEDVAR = CodeTemplate(
"""\
PyObject* THP${op}_${name}_raw_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
const auto *node = static_cast<${op}*>(self->cdata.get());
const auto& prop = node->${name}_;
if (node->${name}_released_) {
PyErr_SetString(PyExc_RuntimeError, ERR_BACKWARD_TWICE);
return nullptr;
}
${body}
END_HANDLE_TH_ERRORS
}
"""
)
GETTER_DEFINITION_OPT = CodeTemplate(
"""\
PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
auto opt_prop = static_cast<${op}*>(self->cdata.get())->${name};
if (!opt_prop.has_value()) {
Py_RETURN_NONE;
}
auto prop = opt_prop.value();
${body}
END_HANDLE_TH_ERRORS
}
"""
)
GETTER_DEFINITION_OPT_ARRAYREF = CodeTemplate(
"""\
PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
auto opt_prop = static_cast<${op}*>(self->cdata.get())->${name};
if (!opt_prop.list.has_value()) {
Py_RETURN_NONE;
}
auto prop = opt_prop.list.value();
${body}
END_HANDLE_TH_ERRORS
}
"""
)
# Getter body
GETTER_BODY_SAVEDVAR = """\
return THPVariable_Wrap(prop.unpack(self->cdata));
"""
GETTER_BODY_RAW_SAVEDVAR = """\
pybind11::object obj = pybind11::cast(prop, pybind11::return_value_policy::reference);
return obj.release().ptr();
"""
GETTER_BODY_VEC_SAVEDVAR = """\
PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
for (auto i: c10::irange(prop.size())) {
PyTuple_SetItem(tup, (Py_ssize_t) i, THPVariable_Wrap(prop[i].unpack(self->cdata)));
}
return tup;
"""
GETTER_BODY_RAW_VEC_SAVEDVAR = """\
PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
for (auto i : c10::irange(prop.size())) {
pybind11::object obj = pybind11::cast(prop[i], pybind11::return_value_policy::reference);
PyTuple_SetItem(tup, (Py_ssize_t) i, obj.release().ptr());
}
return tup;
"""
GETTER_BODY_ARRAYREF_LONG = """\
PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
for (auto i : c10::irange(prop.size())) {
PyTuple_SetItem(tup, (Py_ssize_t) i, PyLong_FromUnsignedLong((uint64_t) prop[i]));
}
return tup;
"""
GETTER_BODY_ARRAYREF_SYMINT = """\
PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
for (auto i : c10::irange(prop.size())) {
auto si = prop[i];
if (auto m = si.maybe_as_int()) {
PyTuple_SetItem(tup, (Py_ssize_t) i, PyLong_FromUnsignedLong(*m));
} else {
auto py_symint = py::cast(si).release().ptr();
PyTuple_SetItem(tup, (Py_ssize_t) i, py_symint);
}
}
return tup;
"""
GETTER_BODY_ARRAYREF_DOUBLE = """\
PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
for (auto i : c10::irange(prop.size())) {
PyTuple_SetItem(tup, (Py_ssize_t) i, PyFloat_FromDouble((double) prop[i]));
}
return tup;
"""
GETTER_BODY_INT64_T = """\
return PyLong_FromUnsignedLong((int64_t) prop);
"""
GETTER_BODY_SYMINT = """\
if (auto m = prop.maybe_as_int()) {
return PyLong_FromUnsignedLong(*m);
} else {
return py::cast(prop).release().ptr();
}
"""
GETTER_BODY_DOUBLE = """\
return PyFloat_FromDouble((double) prop);
"""
GETTER_BODY_BOOL = """\
if (prop) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
"""
GETTER_BODY_STRING = """\
return PyUnicode_FromStringAndSize(prop.data(), prop.size());
"""
GETTER_BODY_SCALAR = """\
if (prop.isComplex()) {
auto cprop = prop.to<c10::complex<double>>();
return PyComplex_FromDoubles(cprop.real(), cprop.imag());
} else if (prop.isFloatingPoint()) {
return PyFloat_FromDouble(prop.to<double>());
} else if (prop.isIntegral(/*includeBool=*/false)) {
return PyLong_FromLong(prop.to<int64_t>());
} else if (prop.isBoolean()) {
if (prop.to<bool>()) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
} else {
PyErr_SetString(PyExc_RuntimeError, "Unknown scalar type");
return nullptr;
}
"""
GETTER_BODY_VEC_SCALAR = """\
PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
for (auto i: c10::irange(prop.size())) {
if (prop[i].isComplex()) {
auto cprop = prop[i].to<c10::complex<double>>();
PyTuple_SetItem(tup, (Py_ssize_t) i, PyComplex_FromDoubles(cprop.real(), cprop.imag()));
} else if (prop[i].isFloatingPoint()) {
auto double_prop = prop[i].to<double>();
PyTuple_SetItem(tup, (Py_ssize_t) i, PyFloat_FromDouble(double_prop));
} else if (prop[i].isIntegral(/*includeBool=*/false)) {
auto long_prop = prop[i].to<int64_t>();
PyTuple_SetItem(tup, (Py_ssize_t) i, PyLong_FromLong(long_prop));
} else if (prop[i].isBoolean()) {
if (prop[i].to<bool>()) {
PyTuple_SetItem(tup, (Py_ssize_t) i, Py_True);
} else {
PyTuple_SetItem(tup, (Py_ssize_t) i, Py_False);
}
} else {
PyErr_SetString(PyExc_RuntimeError, "Unknown scalar type");
return nullptr;
}
}
return tup;
"""
MISC_GETTER_DEFS = {
OptionalCType(BaseCType(longT)): (GETTER_DEFINITION_OPT, GETTER_BODY_INT64_T),
OptionalCType(BaseCType(SymIntT)): (GETTER_DEFINITION_OPT, GETTER_BODY_SYMINT),
BaseCType(doubleT): (GETTER_DEFINITION, GETTER_BODY_DOUBLE),
OptionalCType(BaseCType(doubleT)): (GETTER_DEFINITION_OPT, GETTER_BODY_DOUBLE),
BaseCType(boolT): (GETTER_DEFINITION, GETTER_BODY_BOOL),
BaseCType(scalarT): (GETTER_DEFINITION, GETTER_BODY_SCALAR),
OptionalCType(BaseCType(scalarT)): (GETTER_DEFINITION_OPT, GETTER_BODY_SCALAR),
}
# These functions have backwards which cannot be traced, and so must have
# their backward functions traced opaquely.
# VIEW_FUNCTIONS are not traceable because they use as_strided, which
# has an untraceable backwards, see
# https://github.com/pytorch/pytorch/issues/4250
# TODO: This is probably not exhaustive, but it's a start
UNTRACEABLE_FUNCTIONS = VIEW_FUNCTIONS
def get_infos_with_derivatives_list(
differentiability_infos: dict[FunctionSchema, dict[str, DifferentiabilityInfo]]
) -> list[DifferentiabilityInfo]:
diff_info_list = [
info
for diffinfo_dict in differentiability_infos.values()
for info in diffinfo_dict.values()
]
return list(filter(lambda info: info.args_with_derivatives, diff_info_list))
def gen_autograd_functions_lib(
out: str,
differentiability_infos: dict[FunctionSchema, dict[str, DifferentiabilityInfo]],
template_path: str,
) -> None:
"""Functions.h and Functions.cpp body
These contain the auto-generated subclasses of torch::autograd::Node
for each every differentiable torch function.
"""
# get a 1D list of diffinfos, we do not need them to be per FunctionSchema/DispatchKey here
# infos with the diff dispatchkeys but the same name will still be in the same shard.
infos = get_infos_with_derivatives_list(differentiability_infos)
declarations = [process_function(f, FUNCTION_DECLARATION) for f in infos]
definitions = [process_function(f, FUNCTION_DEFINITION) for f in infos]
file_basename = "Functions"
fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
for suffix in [".h", ".cpp"]:
fname = file_basename + suffix
fm.write_with_template(
fname,
fname,
lambda: {
"generated_comment": "@"
+ f"generated from {fm.template_dir_for_comments()}/"
+ fname,
"autograd_function_declarations": declarations,
"autograd_function_definitions": definitions,
},
)
def gen_autograd_functions_python(
out: str,
differentiability_infos: dict[FunctionSchema, dict[str, DifferentiabilityInfo]],
template_path: str,
) -> None:
fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
num_shards = 5
fm.write(
"python_functions.h",
lambda: {
"generated_comment": "@"
+ f"generated from {fm.template_dir_for_comments()}/python_functions.h",
"shard_forward_declare": [
f"void initialize_autogenerated_functions_{i}(PyObject* module);"
for i in range(num_shards)
],
"shard_call": [
f"initialize_autogenerated_functions_{i}(module);"
for i in range(num_shards)
],
},
)
# get a 1D list of diffinfos, we do not need them to be per FunctionSchema/DispatchKey here
# infos with the diff dispatchkeys but the same name will still be in the same shard.
infos = get_infos_with_derivatives_list(differentiability_infos)
fm.write_sharded(
"python_functions.cpp",
infos,
key_fn=lambda info: info.name,
base_env={
"generated_comment": "@"
+ f"generated from {fm.template_dir_for_comments()}/python_functions.cpp",
},
env_callable=lambda info: {
"py_function_initializers": [
process_function(info, PY_FUNCTION_DEFINITION)
],
"py_function_props_and_getters": [
process_function(info, PY_FUNCTION_PROPS_AND_GETTERS)
],
},
num_shards=num_shards,
sharded_keys={"py_function_initializers", "py_function_props_and_getters"},
)
def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str:
saved_variables: list[str] = []
release_variables: list[str] = []
saved_list_sizes: list[str] = []
unpack: list[str] = []
asserts: list[str] = []
compute_index_ranges: list[str] = []
getter_definitions: list[str] = []
py_getsetdef_structs: list[str] = []
compiled_args: list[str] = []
apply_with_saved_before: list[str] = []
apply_with_saved_after: list[str] = []
for arg in info.args_with_derivatives:
if arg.type in TENSOR_LIST_LIKE_CTYPES:
size = f"{arg.name}_size_"
saved_list_sizes.append(f"size_t {arg.name}_size_;")
else:
size = "1"
compute_index_ranges.append(f"auto {arg.name}_ix = gen.range({size});")
def save_var(var: SavedAttribute, is_output: bool) -> None:
name = var.nctype.name
type = var.nctype.type
should_append_getsetdef = True
should_append_raw_getsetdef = False
visit_name = name
uses_cpp_saved_variable_cls = False
if (
type == BaseCType(tensorT)
or type == OptionalCType(BaseCType(tensorT))
or type == MutRefCType(OptionalCType(BaseCType(tensorT)))
or (type == BaseCType(scalarT) and is_output)
):
uses_cpp_saved_variable_cls = True
saved_variables.append(f"SavedVariable {name}_;")
release_variables.append(f"{name}_.reset_data();")
ptr = "shared_from_this()" if is_output else ""
unpack.append(f"auto {name} = {name}_.unpack({ptr});")
getter_definitions.append(
GETTER_DEFINITION_SAVEDVAR.substitute(
op=info.op, name=name, body=GETTER_BODY_SAVEDVAR
)
)
getter_definitions.append(
GETTER_DEFINITION_RAW_SAVEDVAR.substitute(
op=info.op, name=name, body=GETTER_BODY_RAW_SAVEDVAR
)
)
should_append_raw_getsetdef = True
visit_name = f"{name}_"
elif (
type == BaseCType(tensorListT)
or type == BaseCType(iTensorListRefT)
or type == VectorCType(BaseCType(tensorT))
):
# note(crcrpar): [nuanced return type of out-of-place foreach functions]
# When an out-of-place foreach function whose return signature is `Tensor[]`
# spells out its backward definitions in `derivatives.yaml`, and some of them depend on
# `result`, `result`'s type is interpreted and treated as `std::vector<Tensor>`.
# An out-of-place foreach whose backwards rely on their output doesn't suffer from this
# difference if the definitions are codegen'ed.
# This special case is needed for `_foreach_pow.List` and `_foreach_pow.ScalarAndTensor`
# as of https://github.com/pytorch/pytorch/pull/105504.
if type == VectorCType(BaseCType(tensorT)):
assert (
info.func.func.name.name.base.startswith("_foreach") and is_output
)
uses_cpp_saved_variable_cls = True
saved_variables.append(f"std::vector<SavedVariable> {name}_;")
saved_variables.append(f"bool {name}_released_ = false;")
# Just clear() is sufficient, we don't need to loop and clear each variable.
# Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
release_variables.append(f"{name}_.clear();")
release_variables.append(f"{name}_released_ = true;")
ptr = "shared_from_this()" if is_output else "nullptr"
unpack.append(f"auto {name} = unpack_list({name}_, {ptr});")
asserts.append(f"TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);")
getter_definitions.append(
GETTER_DEFINITION_VEC_SAVEDVAR.substitute(
op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR
)
)
getter_definitions.append(
GETTER_DEFINITION_RAW_VEC_SAVEDVAR.substitute(
op=info.op, name=name, body=GETTER_BODY_RAW_VEC_SAVEDVAR
)
)
should_append_raw_getsetdef = True
visit_name = f"{name}_"
elif type == ListCType(OptionalCType(BaseCType(tensorT))):
uses_cpp_saved_variable_cls = True
saved_variables.append(f"std::vector<SavedVariable> {name}_;")
saved_variables.append(f"bool {name}_released_ = false;")
# Just clear() is sufficient, we don't need to loop and clear each variable.
# Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
release_variables.append(f"{name}_.clear();")
release_variables.append(f"{name}_released_ = true;")
unpack.append(f"auto {name} = unpack_opt_list({name}_);")
asserts.append(f"TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);")
getter_definitions.append(
GETTER_DEFINITION_VEC_SAVEDVAR.substitute(
op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR
)
)
getter_definitions.append(
GETTER_DEFINITION_RAW_VEC_SAVEDVAR.substitute(
op=info.op, name=name, body=GETTER_BODY_RAW_VEC_SAVEDVAR
)
)
should_append_raw_getsetdef = True
visit_name = f"{name}_"
elif type == BaseCType(intArrayRefT):
saved_variables.append(f"std::vector<int64_t> {name};")
getter_definitions.append(
GETTER_DEFINITION.substitute(
op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG
)
)
elif type == BaseCType(symIntArrayRefT):
saved_variables.append(f"std::vector<c10::SymInt> {name};")
getter_definitions.append(
GETTER_DEFINITION.substitute(
op=info.op, name=name, body=GETTER_BODY_ARRAYREF_SYMINT
)
)
elif type == BaseCType(optionalIntArrayRefT):
saved_variables.append(f"c10::OptionalArray<int64_t> {name};")
getter_definitions.append(
GETTER_DEFINITION_OPT_ARRAYREF.substitute(
op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG
)
)
elif type == BaseCType(optionalSymIntArrayRefT):
saved_variables.append(f"c10::OptionalArray<c10::SymInt> {name};")
getter_definitions.append(
GETTER_DEFINITION_OPT_ARRAYREF.substitute(
op=info.op, name=name, body=GETTER_BODY_ARRAYREF_SYMINT
)
)
elif type == OptionalCType(BaseCType(intArrayRefT)):
saved_variables.append(f"c10::OptionalArray<int64_t> {name};")
getter_definitions.append(
GETTER_DEFINITION_OPT_ARRAYREF.substitute(
op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG
)
)
elif type == OptionalCType(BaseCType(symIntArrayRefT)):
saved_variables.append(f"c10::OptionalArray<c10::SymInt> {name};")
getter_definitions.append(
GETTER_DEFINITION_OPT_ARRAYREF.substitute(
op=info.op, name=name, body=GETTER_BODY_ARRAYREF_SYMINT
)
)
elif type == OptionalCType(ArrayRefCType(BaseCType(doubleT))):
saved_variables.append(f"c10::OptionalArray<double> {name};")
getter_definitions.append(
GETTER_DEFINITION_OPT_ARRAYREF.substitute(
op=info.op, name=name, body=GETTER_BODY_ARRAYREF_DOUBLE
)
)
elif type == BaseCType(longT):
saved_variables.append(f"{type.cpp_type()} {name} = 0;")
getter_definitions.append(
GETTER_DEFINITION.substitute(
op=info.op, name=name, body=GETTER_BODY_INT64_T
)
)
elif type == BaseCType(SymIntT):
saved_variables.append(f"c10::SymInt {name};")
getter_definitions.append(
GETTER_DEFINITION.substitute(
op=info.op, name=name, body=GETTER_BODY_SYMINT
)
)
elif type == BaseCType(stringT):
saved_variables.append(f"std::string {name};")
getter_definitions.append(
GETTER_DEFINITION.substitute(
op=info.op, name=name, body=GETTER_BODY_STRING
)
)
elif type == OptionalCType(BaseCType(stringT)):
saved_variables.append(f"std::optional<std::string> {name};")
getter_definitions.append(
GETTER_DEFINITION_OPT.substitute(
op=info.op, name=name, body=GETTER_BODY_STRING
)
)
elif type == ArrayRefCType(
elem=BaseCType(type=BaseCppType(ns="at", name="Scalar"))
):
saved_variables.append(f"std::vector<at::Scalar> {name};")
saved_variables.append(f"bool {name}_released_ = false;")
# Just clear() is sufficient, we don't need to loop and clear each variable.
# Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
release_variables.append(f"{name}.clear();")
# release_variables.append(f"{name}_released_ = true;")
# unpack.append(f"auto {name} = unpack_list({name}_);")
# asserts.append(f"TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);")
getter_definitions.append(
CodeTemplate(
"""\
PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
HANDLE_TH_ERRORS
const auto *node = static_cast<${op}*>(self->cdata.get());
const auto& prop = node->${name};
if (node->${name}_released_) {
PyErr_SetString(PyExc_RuntimeError, ERR_BACKWARD_TWICE);
return nullptr;
}
${body}
END_HANDLE_TH_ERRORS
}
"""
).substitute(
op=info.op,
name=name,
body=GETTER_BODY_VEC_SCALAR,
)
)
else:
# Check for indicators that you're putting a non-owning reference
# into the saved variable field. If this is spuriously firing,
# edit this field. Otherwise, you probably need to add a case
# above.
assert (
"ref" not in type.cpp_type().lower()
and "view" not in type.cpp_type().lower()
and "*" not in type.cpp_type()
and "&" not in type.cpp_type()
), f"{type.cpp_type()} looks like it contains a non-owning reference"
saved_variables.append(f"{type.cpp_type()} {name};")
if type in MISC_GETTER_DEFS:
getter_def, body = MISC_GETTER_DEFS[type]
getter_definitions.append(
getter_def.substitute(op=info.op, name=name, body=body)
)
else:
# Types we don't expose python bindings to yet:
# TypeAndSize, at::ScalarType, TensorOptions, TensorGeometry,
# std::vector<std::vector<int64_t>>, std::vector<at::ScalarType>
should_append_getsetdef = False
if should_append_getsetdef:
py_getsetdef_structs.append(
PY_GETSETDEF_STRUCT.substitute(op=info.op, name=name)
)
if should_append_raw_getsetdef:
py_getsetdef_structs.append(
PY_RAW_GETSETDEF_STRUCT.substitute(op=info.op, name=name)
)
if uses_cpp_saved_variable_cls:
compiled_args.append(
f"args.collect({visit_name}, {'true' if is_output else 'false'});"
)
else:
compiled_args.append(f"args.collect({visit_name});")
apply_with_saved_before.append(f"saved.before({visit_name});")
apply_with_saved_after.append(f"saved.after({visit_name});")
for var in sorted(info.all_saved_inputs, key=lambda sa: str(sa.nctype.name)):
save_var(var, is_output=False)
for var in sorted(info.all_saved_outputs, key=lambda sa: str(sa.nctype.name)):
save_var(var, is_output=True)
# lock the mutex when we release variables and in Node::apply to protect thread safety
# see Note [Thread Safety on Autograd Node]
if len(release_variables) > 0:
thread_lock = "std::lock_guard<std::mutex> lock(mutex_);"
else:
thread_lock = ""
if uses_retain_variables(info):
will_release_variables = WILL_RELEASE_VARIABLES.substitute()
else:
will_release_variables = ""
body: list[str] = []
if uses_single_grad(info):
body.append("const auto& grad = grads[0];")
else:
# Generate aliases for gradients named for returned values.
body.extend(
f"const auto& {name} = grads[{info.available_named_gradients.index(name)}];"
for name in sorted(info.used_named_gradients)
)
def emit_derivative(
derivative: Derivative,
args_with_derivatives: Sequence[Binding],
) -> tuple[bool, str]:
formula = derivative.formula
var_names = derivative.var_names
if len(var_names) == 1:
checks_any_grad_defined = False
if "not_implemented" not in formula:
matching_args = [
arg for arg in args_with_derivatives if arg.name == var_names[0]
]
if len(matching_args) == 1:
# We can add undefined grad support if the input variable is a Tensor
arg = matching_args[0]
if isinstance(arg.argument, Argument) and str(
arg.argument.type
) in ("Tensor", "Tensor?"):
formula = "any_grad_defined ? (" + formula + ") : Tensor()"
checks_any_grad_defined = True
if info.name.startswith("_foreach_"):
derivative_template = DERIVATIVE_SINGLE_FOREACH
else:
derivative_template = DERIVATIVE_SINGLE
return (
checks_any_grad_defined,
derivative_template.substitute(name=var_names[0], derivative=formula),
)
else:
if "grad_input_mask" in formula:
masks = [
f"task_should_compute_output({{ {n}_ix }})," for n in var_names
]
grad_input_mask = GRAD_INPUT_MASK.substitute(
masks=masks, n=len(var_names)
)
else:
grad_input_mask = ""
idx_ranges = ", ".join(f"{n}_ix" for n in var_names)
copy_ranges: list[str] = []
for i, n in enumerate(var_names):
copy_ranges.append(DERIVATIVE_MULTI_COPY_RANGE.substitute(name=n, i=i))
return False, DERIVATIVE_MULTI.substitute(
idx_ranges=idx_ranges,
copy_ranges=copy_ranges,
derivative=formula,
grad_input_mask=grad_input_mask,
)
body.extend(unpack)
need_any_grad_defined_var = False
for derivative in info.derivatives:
checks_any_grad_defined, derivative_text = emit_derivative(
derivative, info.args_with_derivatives
)
body.append(derivative_text)
need_any_grad_defined_var |= checks_any_grad_defined
# Since single-output derivative formulas need to check if grads are
# defined, only perform the check once, before all the formulas
if need_any_grad_defined_var:
body.insert(
-len(info.derivatives),
"bool any_grad_defined = any_variable_defined(grads);",
)
if info.name in UNTRACEABLE_FUNCTIONS:
superclass = "Node"
else:
superclass = "TraceableFunction"
all_getsetdef_structs = (
",\n".join(py_getsetdef_structs) + "," if len(py_getsetdef_structs) != 0 else ""
)
all_getter_definitions = "\n".join(getter_definitions)
return template.substitute(
op=info.op,
compute_index_ranges=compute_index_ranges,
saved_variables=saved_variables,
release_variables=release_variables,
saved_list_sizes=saved_list_sizes,
asserts=asserts,
thread_lock=thread_lock,
will_release_variables=will_release_variables,
body=body,
superclass=superclass,
all_getter_definitions=all_getter_definitions,
all_getsetdef_structs=all_getsetdef_structs,
compiled_args=compiled_args,
apply_with_saved_before=apply_with_saved_before,
apply_with_saved_after=apply_with_saved_after,
)

View File

@ -0,0 +1,675 @@
# Generates ADInplaceOrViewType.h/cpp
#
# NOTE: If any changes are being made to the ADInplaceOrView codegen please also check
# if updates are needed in torch/csrc/autograd/autograd_not_implemented_fallback.cpp
# The fallback is expected to mimick this codegen, so we should keep the two in sync.
from __future__ import annotations
from torchgen.api import cpp
from torchgen.api.autograd import (
dispatch_strategy,
gen_differentiable_outputs,
NativeFunctionWithDifferentiabilityInfo,
)
from torchgen.api.types import (
BaseCType,
Binding,
boolT,
ConstRefCType,
CType,
DispatcherSignature,
intArrayRefT,
longT,
OptionalCType,
symIntArrayRefT,
SymIntT,
tensorT,
)
from torchgen.code_template import CodeTemplate
from torchgen.context import with_native_function
from torchgen.model import (
NativeFunction,
SchemaKind,
SelfArgument,
TensorOptionsArguments,
Type,
)
from torchgen.utils import FileManager
from .context import with_native_function_with_differentiability_info
from .gen_trace_type import (
get_return_value,
MANUAL_AUTOGRAD,
tie_return_values,
type_wrapper_name,
)
# See NOTE [ Autograd View Variables ] in variable.h for details.
# If you update list VIEW_FUNCTIONS or RETURNS_VIEWS_OF_INPUT,
# you **MUST** also update the public list of view ops accordingly in
# docs/source/tensor_view.rst. Note not all ATen functions are exposed to public,
# e.g alias & sparse_coo_tensor_with_dims_and_tensors.
#
# A map: function name => name of the argument that all outputs are view of
VIEW_FUNCTIONS_WITH_METADATA_CHANGE = [
"view_as_complex",
"view_as_real",
"_conj",
"_neg_view",
"_nested_get_values",
"_nested_view_from_buffer",
"_nested_view_from_jagged",
]
VIEW_FUNCTIONS = {
"numpy_T": "self",
"alias": "self",
"as_strided": "self",
"diagonal": "self",
"expand": "self",
"permute": "self",
"select": "self",
"slice": "self",
"slice_inverse": "self",
"split": "self",
"split_with_sizes": "self",
"squeeze": "self",
"t": "self",
"transpose": "self",
"unfold": "self",
"unsqueeze": "self",
"flatten": "self",
"view": "self",
"unbind": "self",
"_indices": "self",
"_values": "self",
"indices": "self",
"values": "self",
"crow_indices": "self",
"col_indices": "self",
"ccol_indices": "self",
"row_indices": "self",
# sparse_coo ctor output should really be views of both indices and values,
# but we only supports making as view of a single variable, and indices is
# discrete anyways.
# FIXME: clone indices on construction.
"sparse_coo_tensor_with_dims_and_tensors": "values",
"_reshape_alias": "self",
"_test_autograd_multiple_dispatch_view": "self",
}
for key in VIEW_FUNCTIONS_WITH_METADATA_CHANGE:
VIEW_FUNCTIONS[key] = "self"
# note: some VIEW_FUNCTIONS are just compositions of the view functions above
# this list contains both the root view functions and any that are purely composed
# of viewing functions, and is used by the JIT to determine when an operator
# may return a view of its inputs; however they may sometimes return a copy.
# (e.g. `contiguous`)
RETURNS_VIEWS_OF_INPUT = set(VIEW_FUNCTIONS.keys()).union(
{
"chunk",
"detach",
"contiguous",
"reshape",
"reshape_as",
"expand_as",
"view_as",
"real",
"imag",
"narrow",
"movedim",
"tensor_split",
"swapdims",
"swapaxes",
"mT",
"mH",
"adjoint",
"matrix_H",
}
)
# These are the functions we consider views for the purposes of validating
# StorageImpl and TensorImpl in gen_variable_type.
# `_unsafe_view` is not included in VIEW_FUNCTIONS above because it is not a
# view for the purposes of ADInplaceOrView kernel, we do not want to call as_view
# See NOTE [Unsafe View] for more info.
ALL_VIEW_FUNCTIONS = {
**VIEW_FUNCTIONS,
"_unsafe_view": "self",
}
ARRAYREF_TO_VEC = CodeTemplate(
"""\
auto ${vec} = ${arg}.vec();
"""
)
OPTIONAL_TO_VAL = CodeTemplate(
"""\
auto ${val} = ${arg}.value_or(${default});
"""
)
CALL_DISPATCH = CodeTemplate(
"""\
at::_ops::${unambiguous_name}::call(${unpacked_args})"""
)
REVERSE_VIEW_DISPATCH = CodeTemplate(
"""\
${reverse_name}(${unpacked_args})"""
)
MULTI_OUTPUT_VIEW_ITERATION = CodeTemplate(
"""\
for (auto ${view_idx} : c10::irange(${var}.size())) {
${body}
}
"""
)
SETUP_REPLAY_VIEW_IF_NOT_SUPPORT_AS_STRIDED_OR_VIEW_WITH_METADATA_CHANGE = CodeTemplate(
"""\
std::unique_ptr<torch::autograd::ViewFunc> func(nullptr);
std::function<at::Tensor(const at::Tensor&)> rev_func=nullptr;
if (${is_view_with_metadata_change} ||
!self.unsafeGetTensorImpl()->support_as_strided() ||
self.unsafeGetTensorImpl()->is_python_dispatch() ||
c10::AutogradState::get_tls_state().get_view_replay_enabled()) {
${replay_view_func}
${reverse_replay_view_func}
}
"""
)
REPLAY_VIEW_FUNC = CodeTemplate(
"""\
func = std::make_unique<${view_func_name}>(${view_func_args});
"""
)
REVERSE_REPLAY_VIEW_LAMBDA_FUNC = CodeTemplate(
"""\
rev_func = [=](const at::Tensor& ${input_view}) {
return ${reverse_replay_view_call};
};
"""
)
METHOD_DEFINITION = CodeTemplate(
"""\
${return_type} ${type_wrapper_name}(${formals}) {
${type_definition_body}
}
"""
)
WRAPPER_REGISTRATION = CodeTemplate(
"""\
m.impl("${unqual_operator_name_with_overload}",
TORCH_FN(${class_type}::${type_wrapper_name})
);
"""
)
AUTOGRAD_NOT_IMPLEMENTED_REGISTRATION = CodeTemplate(
"""\
m.impl("${unqual_operator_name_with_overload}", torch::autograd::autogradNotImplementedFallback());
"""
)
INPLACE_REDISPATCH = CodeTemplate(
"""\
{
at::AutoDispatchBelowADInplaceOrView guard;
at::_ops::${unambiguous_name}::redispatch(${unpacked_args});
}
"""
)
ASSIGN_RETURN_VALUE = CodeTemplate(
"""\
${return_values} = ${rhs_value};
"""
)
VIEW_REDISPATCH = CodeTemplate(
"""\
${assign_return_values} ([&]() {
at::AutoDispatchBelowADInplaceOrView guard;
return at::_ops::${unambiguous_name}::redispatch(${unpacked_args});
})();
"""
)
TMP_VAR = "_tmp"
# FIXME: Ideally these functions should be methods on Type class, but we have a
# comment in codegen/model.py there saying these concepts are not well defined.
# Thus we put a version that commonly used by autograd codegen here.
def is_tensor_type(t: Type) -> bool:
# TODO: Should handle optional here?
return t.is_tensor_like() and t.is_list_like() is None
def is_tensor_list_type(t: Type) -> bool:
# TODO: Should handle optional here?
return t.is_tensor_like() and t.is_list_like() is not None
UNPACK_TENSOR = CodeTemplate(
"""\
auto${ref} ${arg_name}_ = unpack${suffix}(${arg_name}, "${arg_name}", ${arg_pos});"""
)
def unpacked_name(arg_name: str) -> str:
return arg_name + "_"
# e.g. select.int -> select_copy_int_inverse()
def inverse_view_name(f: NativeFunction) -> str:
copy_variant = f"{f.root_name}_copy"
overload = f"{f.func.name.overload_name}"
if overload != "":
overload = "_" + overload
return f"{copy_variant}{overload}_inverse"
def extract_bindings(f: NativeFunction) -> list[Binding]:
return [
r
for a in f.func.schema_order_arguments()
for r in cpp.argument(
a,
method=False,
symint=True,
cpp_no_default_args=set(),
faithful=False,
has_tensor_options=False,
)
]
@with_native_function
def unpack_args(f: NativeFunction) -> tuple[list[str], list[Binding]]:
body: list[str] = []
unpacked_bindings: list[Binding] = []
for i, binding in enumerate(extract_bindings(f)):
assert not isinstance(binding.argument, SelfArgument)
if isinstance(binding.argument, TensorOptionsArguments):
raise RuntimeError("VariableKernel shouldn't take TensorOptions")
is_nullable = binding.argument.type.is_nullable()
if not binding.argument.type.is_tensor_like() or is_nullable:
unpacked_bindings.append(binding)
continue
is_tensor_list = is_tensor_list_type(binding.argument.type)
ref = (not is_nullable) and not is_tensor_list
suffix = "_opt" if is_nullable and not is_tensor_list else ""
body.append(
UNPACK_TENSOR.substitute(
arg_name=binding.name,
arg_pos=i,
suffix=suffix,
ref="&" if ref else "",
)
)
unpacked_bindings.append(
Binding(
name=unpacked_name(binding.name),
nctype=binding.nctype,
argument=binding.argument,
default=binding.default,
)
)
return body, unpacked_bindings
def get_base_name(f: NativeFunction) -> str:
return f.func.name.name.base # TODO: should be str(f.func.name.name)?
def get_view_info(f: NativeFunction) -> str | None:
base_name = get_base_name(f)
view_info = VIEW_FUNCTIONS.get(base_name, None)
if view_info is None and base_name in RETURNS_VIEWS_OF_INPUT:
view_info = "self"
return view_info
def emit_view_func(
f: NativeFunction, bindings: list[Binding], view_idx: str | None = None
) -> str:
"""Generate an additional lambda function to recover views in backward when as_strided is not supported.
See Note [View + Inplace update for base tensor] and [View + Inplace update for view tensor] for more details.
"""
# TODO: Clean this logic up if we get rid of reverse view funcs or reify them.
input_base = "input_base"
replay_view_func = ""
updated_args: list[str] = []
known_view_arg_simple_types: list[CType] = [
BaseCType(longT),
OptionalCType(BaseCType(longT)),
BaseCType(SymIntT),
OptionalCType(BaseCType(SymIntT)),
BaseCType(boolT),
BaseCType(intArrayRefT),
BaseCType(symIntArrayRefT),
ConstRefCType(BaseCType(tensorT)),
ConstRefCType(OptionalCType(BaseCType(tensorT))),
]
for binding in bindings:
arg, arg_type = binding.name, binding.nctype.type
if arg == "self":
updated_args.append(input_base)
continue
if arg_type not in known_view_arg_simple_types:
known_types_str = ", ".join([str(t) for t in known_view_arg_simple_types])
raise TypeError(
f"You are adding an {arg_type} {arg} argument to op {cpp.name(f.func)} in addition to known types: "
f"{known_types_str}. Please update the list or materialize it so that it can be closed "
"over by value, also add a test in pytorch/xla/test/test_operations.py where this code "
"is exercised."
)
if arg_type == BaseCType(intArrayRefT) or arg_type == BaseCType(
symIntArrayRefT
):
# It's not safe to close over IntArrayRef by value, since this is a
# reference type, so materialize a vector to close over by value
arg_vec = arg + "_vec"
replay_view_func += ARRAYREF_TO_VEC.substitute(arg=arg, vec=arg_vec)
updated_args.append(arg_vec)
elif arg_type == OptionalCType(BaseCType(longT)):
# Materialize int64_t? to int64_t
arg_value = arg + "_val"
replay_view_func += OPTIONAL_TO_VAL.substitute(
arg=arg, val=arg_value, default="0"
)
updated_args.append(arg_value)
elif arg_type == ConstRefCType(BaseCType(tensorT)) or arg_type == ConstRefCType(
OptionalCType(BaseCType(tensorT))
):
# NB: Closing over a tensor. If a user modifies this tensor, this will be silently
# incorrect. The proper thing to do is to store the version counter and copy on write.
updated_args.append(arg)
else:
updated_args.append(arg)
from .gen_view_funcs import view_func_name
view_func_args = [b.name for b in bindings if b.name != "self"]
if view_idx is not None:
view_func_args.append(f"{view_idx}")
replay_view_func += REPLAY_VIEW_FUNC.substitute(
view_func_name=view_func_name(f, include_namespace=True),
view_func_args=view_func_args,
)
input_view = "input_view"
reverse_unpacked_args = [
"self",
f"{input_view}",
# inverse_return_mode=
"at::functionalization::InverseReturnMode::AlwaysView",
*(() if view_idx is None else (f"{view_idx}",)),
# skip input_base arg
*updated_args[1:],
]
from torchgen.api.functionalization import reverse_name
reverse_replay_view_call = REVERSE_VIEW_DISPATCH.substitute(
reverse_name=reverse_name(f, include_namespace=True),
unpacked_args=reverse_unpacked_args,
)
reverse_replay_view_func = REVERSE_REPLAY_VIEW_LAMBDA_FUNC.substitute(
input_view=input_view, reverse_replay_view_call=reverse_replay_view_call
)
is_view_with_metadata_change = (
"true" if cpp.name(f.func) in VIEW_FUNCTIONS_WITH_METADATA_CHANGE else "false"
)
return SETUP_REPLAY_VIEW_IF_NOT_SUPPORT_AS_STRIDED_OR_VIEW_WITH_METADATA_CHANGE.substitute(
is_view_with_metadata_change=is_view_with_metadata_change,
replay_view_func=replay_view_func,
reverse_replay_view_func=reverse_replay_view_func,
)
def emit_view_body(
fn: NativeFunctionWithDifferentiabilityInfo, var: str
) -> tuple[str, str]:
# See NOTE [ Autograd View Variables ] in variable.h for details.
f = fn.func
base_name = get_base_name(f)
view_info = get_view_info(f)
call = ""
differentiable_outputs = gen_differentiable_outputs(fn)
differentiable_output_vars = {r.name for r in differentiable_outputs}
if not isinstance(view_info, str):
raise TypeError(
f"The view info should be a string for {base_name}, but it is: {view_info}"
)
if len(differentiable_output_vars) == 0:
# no output is differentiable (.indices() for SparseTensors for example)
rhs_value = (
f"as_view({view_info}, {var}, "
f"/* is_bw_differentiable */ false, /* is_fw_differentiable */ false)"
)
elif len(differentiable_output_vars) == 1:
# Single differentiable output (Tensor or Tensor[])
return_info = differentiable_outputs[0]
# We only support simple Tensor or a TensorList for functions that return views
if not is_tensor_type(return_info.type) and not is_tensor_list_type(
return_info.type
):
raise RuntimeError(
f"{base_name} that return differentiable views can only return Tensor or Tensor[]"
)
# See Note [ View + Inplace detection]
def get_creation_meta_in_mode(original: str) -> str:
creation_meta_with_grad_mode = f"(at::GradMode::is_enabled() ? {original} : CreationMeta::NO_GRAD_MODE)"
return f"InferenceMode::is_enabled() ? CreationMeta::INFERENCE_MODE : {creation_meta_with_grad_mode}"
# Only allow rebasing of the history if we return a single Tensor
# If we are in a no grad block, raise a warning
# See NOTE [ View + Inplace detection ] for more details about this logic
if is_tensor_list_type(return_info.type):
creation_meta = get_creation_meta_in_mode("CreationMeta::MULTI_OUTPUT_NODE")
view_idx = "view_idx"
view_func = emit_view_func(
f, extract_bindings(f), view_idx=view_idx
).strip()
as_view_call = (
f"as_view(/* base */ {view_info}, /* output */ {var}[{view_idx}], "
"/* is_bw_differentiable */ true, /* is_fw_differentiable */ true, "
"/* view_func */ std::move(func), /* rev_view_func */ rev_func, "
f"/* creation_meta */ {creation_meta});"
)
call += MULTI_OUTPUT_VIEW_ITERATION.substitute(
var=var, view_idx=view_idx, body=f"{view_func}\n{as_view_call}"
)
rhs_value = f"std::move({var})"
else:
call += emit_view_func(f, extract_bindings(f), view_idx=None)
creation_meta = get_creation_meta_in_mode("CreationMeta::DEFAULT")
rhs_value = (
f"as_view(/* base */ {view_info}, /* output */ {var}, /* is_bw_differentiable */ true, "
"/* is_fw_differentiable */ true, "
f"/* view_func */ std::move(func), /* rev_view_func */ rev_func, /* creation_meta */ {creation_meta})"
)
else:
# This could be supported but we don't need it at the moment, so keeping things simple.
raise RuntimeError(
"Function that return multiple differentiable output "
"when at least one of them is view is not supported."
)
return call, rhs_value
def modifies_arguments(f: NativeFunction) -> bool:
return f.func.kind() in [SchemaKind.inplace, SchemaKind.out]
@with_native_function_with_differentiability_info
def emit_inplace_or_view_body(fn: NativeFunctionWithDifferentiabilityInfo) -> list[str]:
f = fn.func
inplace_view_body: list[str] = []
dispatcher_sig = DispatcherSignature.from_schema(f.func)
dispatcher_exprs = dispatcher_sig.exprs()
# code-generated ADInplaceOrView kernels plumb and recompute dispatch keys directly through the kernel for performance.
# See Note [Plumbing Keys Through The Dispatcher] for details.
dispatch_key_set = "ks & c10::after_ADInplaceOrView_keyset"
redispatch_args = ", ".join([dispatch_key_set] + [a.expr for a in dispatcher_exprs])
# Note that this calls the slow, dispatching variants of manual_cpp_binding ops.
# We could probably work harder to ensure that the fast variants are called instead, but the perf benefit would be minimal.
if modifies_arguments(f): # inplace op
inplace_view_body.append(
INPLACE_REDISPATCH.substitute(
unambiguous_name=f.func.name.unambiguous_name(),
unpacked_args=redispatch_args,
)
)
for r in cpp.return_names(f):
inplace_view_body.append(f"increment_version({r});")
else:
assert get_view_info(f) is not None
inplace_view_body.append(
VIEW_REDISPATCH.substitute(
assign_return_values="auto " + TMP_VAR + " = ",
unambiguous_name=f.func.name.unambiguous_name(),
unpacked_args=redispatch_args,
)
)
call, rhs_value = emit_view_body(fn, TMP_VAR)
inplace_view_body.append(call)
assert rhs_value is not None
inplace_view_body.append(
ASSIGN_RETURN_VALUE.substitute(
return_values=tie_return_values(f), rhs_value=rhs_value
)
)
if f.func.returns:
inplace_view_body.append(f"return {get_return_value(f)};")
return inplace_view_body
@with_native_function
def gen_formals(f: NativeFunction) -> str:
return ", ".join(
# code-generated autograd kernels plumb and recompute dispatch keys directly through the kernel for performance.
# See Note [Plumbing Keys Through The Dispatcher] for details.
["c10::DispatchKeySet ks"]
+ [
f'{cpp.argument_type(a, binds="__placeholder__", symint=True).cpp_type()} {a.name}'
for a in f.func.schema_order_arguments()
]
)
@with_native_function_with_differentiability_info
def inplace_or_view_method_definition(
fn: NativeFunctionWithDifferentiabilityInfo,
) -> str | None:
f = fn.func
if get_view_info(f) is None and (
# For functions that modify their inputs but don't return them,
# we can't give them autograd support.
# See https://github.com/pytorch/pytorch/issues/53796
not modifies_arguments(f)
or len(f.func.returns) == 0
):
return None
return METHOD_DEFINITION.substitute(
return_type=cpp.returns_type(f.func.returns, symint=True).cpp_type(),
type_wrapper_name=type_wrapper_name(f),
formals=gen_formals(f),
type_definition_body=emit_inplace_or_view_body(fn),
)
@with_native_function_with_differentiability_info
def inplace_or_view_method_registration(
fn: NativeFunctionWithDifferentiabilityInfo,
) -> str | None:
f = fn.func
if get_view_info(f) is None and (
not modifies_arguments(f) or len(f.func.returns) == 0
):
return None
return WRAPPER_REGISTRATION.substitute(
unqual_operator_name_with_overload=f.func.name,
type_wrapper_name=type_wrapper_name(f),
class_type="ADInplaceOrView",
)
def use_derived(fn: NativeFunctionWithDifferentiabilityInfo) -> bool:
f = fn.func
name = cpp.name(f.func)
return name not in MANUAL_AUTOGRAD and dispatch_strategy(fn) == "use_derived"
def gen_inplace_or_view_type_env(
fn: NativeFunctionWithDifferentiabilityInfo,
) -> dict[str, list[str]]:
definition = inplace_or_view_method_definition(fn)
registration = inplace_or_view_method_registration(fn)
return {
"ops_headers": (
[f"#include <ATen/ops/{fn.func.root_name}_ops.h>"]
if definition is not None
else []
),
"inplace_or_view_method_definitions": [definition]
if definition is not None
else [],
"inplace_or_view_wrapper_registrations": [registration]
if registration is not None
else [],
}
def gen_inplace_or_view_type(
out: str,
native_yaml_path: str,
tags_yaml_path: str,
fns_with_infos: list[NativeFunctionWithDifferentiabilityInfo],
template_path: str,
) -> None:
# NOTE: see Note [Sharded File] at the top of the VariableType.cpp
# template regarding sharding of the generated files.
num_shards = 2
fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
fm.write_sharded(
"ADInplaceOrViewType.cpp",
[fn for fn in fns_with_infos if use_derived(fn)],
key_fn=lambda fn: fn.func.root_name,
base_env={
"generated_comment": "@"
+ f"generated from {fm.template_dir_for_comments()}/ADInplaceOrViewType.cpp",
},
env_callable=gen_inplace_or_view_type_env,
num_shards=2,
sharded_keys={
"ops_headers",
"inplace_or_view_method_definitions",
"inplace_or_view_wrapper_registrations",
},
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,536 @@
from __future__ import annotations
import itertools
from typing import Sequence
from torchgen.api import cpp
from torchgen.api.types import DispatcherSignature
from torchgen.code_template import CodeTemplate
from torchgen.context import with_native_function
from torchgen.model import Argument, NativeFunction, SchemaKind, TensorOptionsArguments
from torchgen.utils import FileManager
# Note [Manual Backend kernels]
# For these ops, we want to manually register to dispatch key Backend and
# skip codegen-ed registeration to all keys before Backend.
# For codegen this means:
# - op set below must match ops with manual_kernel_registration=True in native_functions.yaml
# where we skip codegen backend kernels
# - all ops below are part of MANUAL_AUTOGRAD to skip codegen Autograd kernel registration
# - all ops below are part of MANUAL_TRACER to skip codegen Tracer kernel registration
# Note: we still register to dispatch key Profiler for these ops, keeping it untouched for now.
# You can find the manual registration in torch/csrc/autograd/VariableTypeManual.cpp
MANUAL_BACKEND = {
"options",
"data",
"set_data",
"is_leaf",
"output_nr",
"_version",
"retain_grad",
"_backward",
"requires_grad_",
}
# For these ops we want to skip the codegen-ed registration to both Autograd and Tracer keys.
# You can find the manual registration in torch/csrc/autograd/VariableTypeManual.cpp
MANUAL_AUTOGRAD_AND_TRACER = {
"resize_",
"resize_as_",
"detach",
"detach_",
"copy_",
"_fw_primal",
"_make_dual",
}
# Currently MANUAL_AUTOGRAD and MANUAL_TRACER share the same set of ops:
# union(MANUAL_BACKEND, MANUAL_AUTOGRAD_AND_TRACER)
# You can find the manual registration in torch/csrc/autograd/VariableTypeManual.cpp
MANUAL_AUTOGRAD = MANUAL_TRACER = MANUAL_BACKEND | MANUAL_AUTOGRAD_AND_TRACER
# These functions we don't want to record for tracing, because we always want
# to trace their constituent parts. This is a temporary hack in lieue
# of proper scopes, where subsequent compilation passes can ask for the unfolding
# on demand. Only concrete ATen methods can be disabled this way; it will have
# NO EFFECT otherwise.
DONT_RECORD_TRACE = {
"convolution",
"conv1d",
"conv2d",
"conv3d",
"conv_transpose1d",
"conv_transpose2d",
"conv_transpose3d",
"lstm_cell",
"gru_cell",
"rnn_tanh_cell",
"rnn_relu_cell",
# FIXME: figure out a better way when we support sparse tensors in jit
"_coalesced",
}
def should_trace(f: NativeFunction) -> bool:
# Operations involving Storage or Type are not traceable at the moment
if any(
str(arg.type) in {"Storage", "Type", "ConstQuantizerPtr"}
for arg in f.func.schema_order_arguments()
):
return False
# We can't trace functions which don't have any Tensor or TensorList returns
if not any(r.type.is_tensor_like() for r in f.func.returns):
return False
return f.func.name.name.base not in DONT_RECORD_TRACE
SELECT = CodeTemplate(
"""\
if (${cond}) {
${true}
} else {
${false}
}
"""
)
OP_NAME = CodeTemplate(
"""\
op_name = c10::Symbol::fromQualString("aten::${trace_name}");
"""
)
# These functions have their names recorded under trace renamed,
RENAME_TRACE = {
"zero": "zeros_like", # replacing aten::zero_ with aten::zeros_like
"fill": "full_like", # replacing aten::fill_ with aten::full_like
}
def format_trace_op_name(f: NativeFunction) -> str:
# TODO: byte-for-byte compatible with old codegen behavior - should clean up
if (
f.func.kind() in (SchemaKind.functional, SchemaKind.out)
or f.func.name.name.dunder_method
):
# special case for *_out functions: the in-place and out-of-place ops
# are overloaded with the same name in the JIT
trace_name = str(f.func.name.name)
trace_name = RENAME_TRACE.get(trace_name, trace_name)
return OP_NAME.substitute(trace_name=trace_name)
# otherwise, this is an in-place op and we need to emit both in- and
# out-of-place versions
outplace_trace_name = f.func.name.name.base
inplace_trace_name = cpp.name(f.func)
outplace_trace_name = RENAME_TRACE.get(outplace_trace_name, outplace_trace_name)
inplace_trace_name = RENAME_TRACE.get(inplace_trace_name, inplace_trace_name)
return SELECT.substitute(
cond="tracer_state->force_outplace",
true=OP_NAME.substitute(trace_name=outplace_trace_name),
false=OP_NAME.substitute(trace_name=inplace_trace_name),
)
ADD_TRACE_INPUT = CodeTemplate("""jit::tracer::addInputs(node, "${name}", ${input});""")
def format_trace_inputs(f: NativeFunction) -> str:
def dispatch_trace_input(arg: Argument | TensorOptionsArguments) -> Sequence[str]:
if isinstance(arg, TensorOptionsArguments):
name = "options"
return [
ADD_TRACE_INPUT.substitute(
name=name, input="c10::optTypeMetaToScalarType(options.dtype_opt())"
),
ADD_TRACE_INPUT.substitute(name=name, input="options.layout()"),
ADD_TRACE_INPUT.substitute(name=name, input="options.device()"),
ADD_TRACE_INPUT.substitute(name=name, input="options.pinned_memory()"),
]
else:
name = arg.name
if str(arg.type) == "Tensor?[]":
return [f'jit::tracer::addInputs(node, "{name}", {name});']
else:
return [ADD_TRACE_INPUT.substitute(name=name, input=name)]
args: list[Argument | TensorOptionsArguments] = list(
f.func.schema_order_arguments()
)
if f.func.is_out_fn():
# *_out functions take the result as a separate argument, but we don't want to
# trace that argument directly. Instead, we trace its TensorOptions.
# So first, we need to remove the out argument from the list of arguments to trace.
num_out_args = len(f.func.arguments.out)
args = args[:-num_out_args]
trace_inputs = itertools.chain.from_iterable(
dispatch_trace_input(arg) for arg in args
)
if f.func.is_out_fn():
# for *_out functions, handle the result argument differently for inplace/outplace.
# For inplace: just add the input to the end to confirm with the JIT schema
inplace = [
ADD_TRACE_INPUT.substitute(
name=f.func.arguments.out[i].name, input=f.func.arguments.out[i].name
)
for i in range(num_out_args)
]
# for outplace: do nothing, except if the function is a factory.
# Factories are a bit special because their out-of-place overloads
# take an extra TensorOptions argument, which is missing in the _out function
has_tensor_return = any(r.type.is_tensor_like() for r in f.func.returns)
has_tensor_input_arg = any(
a.type.is_tensor_like() for a in f.func.arguments.flat_non_out
)
is_factory_method = f.category_override == "factory" or (
has_tensor_return and not has_tensor_input_arg
)
# HACK: preserve old codegen behavior - the old codegen set the `is_factory_method`
# flag for the whole family of ops with the same basename if any of them is a
# factory method. For most cases the whole family of ops are indeed all factory
# method - 'normal' is the only exception. So we handle it specially here to avoid
# cloning the old logic.
if f.func.name.name.base == "normal":
is_factory_method = True
if is_factory_method:
outplace = [
ADD_TRACE_INPUT.substitute(
name="out",
input="c10::optTypeMetaToScalarType(out.options().dtype_opt())",
),
ADD_TRACE_INPUT.substitute(name="out", input="out.options().layout()"),
ADD_TRACE_INPUT.substitute(name="out", input="out.options().device()"),
ADD_TRACE_INPUT.substitute(
name="out", input="out.options().pinned_memory()"
),
]
else:
outplace = []
trace_inputs = itertools.chain(
trace_inputs,
[
SELECT.substitute(
cond="tracer_state->force_outplace",
true="\n".join(outplace),
false="\n".join(inplace),
)
],
)
return "\n".join(trace_inputs)
# `torch.jit.trace` have undocumented keyword argument `_force_outplace`,
# which force jit to replace functions with outplace variants (for
# example `aten::add_` becomes `aten::add`).
#
# This replacement implemented in-place with minimum modifications of
# arguments stack (as it assumes that outplace call has the same arguments
# as inplace version).
#
# However there are no such substitutions available for `aten::fill_`
# and `aten::zero_` operators, as we never implemented `aten::fill`
# and `aten::zero`. So jit tracing hack replacing `aten::zero_` with
# `aten::zeros_like` and replacing `aten::fill_` with `aten::full_like`.
#
# But as they potentially can have different arguments, we also have
# to hack into the stack and add missing ones.
#
# A possible alternative would be:
#
# - Add `aten::fill` and `aten::zero`
#
# - Or keep `aten::zeros_like` arguments aligned with `aten::zero_`
# arguments (inside of the `native_functions.yaml`)
RENAME_TRACE_ADD_ARGS = {
"fill": """\
jit::tracer::addInputs(node, "options", ::std::optional<ScalarType>());
jit::tracer::addInputs(node, "options", layout_or_default(::std::nullopt));
jit::tracer::addInputs(node, "options", device_or_default(::std::nullopt));
jit::tracer::addInputs(node, "options", pinned_memory_or_default(::std::nullopt));
::std::optional<MemoryFormat> memory_format = c10::MemoryFormat::Preserve;
jit::tracer::addInputs(node, "memory_format", memory_format);
""",
"zero": """\
jit::tracer::addInputs(node, "options", ::std::optional<ScalarType>());
jit::tracer::addInputs(node, "options", layout_or_default(::std::nullopt));
jit::tracer::addInputs(node, "options", device_or_default(::std::nullopt));
jit::tracer::addInputs(node, "options", pinned_memory_or_default(::std::nullopt));
::std::optional<MemoryFormat> memory_format = c10::MemoryFormat::Preserve;
jit::tracer::addInputs(node, "memory_format", memory_format);
""",
}
INPLACE_GUARD = CodeTemplate(
"""\
jit::tracer::ensureUniqueIfOutOfPlaced("${name}", ${mutable_input});
"""
)
PRE_RECORD_TRACE = CodeTemplate(
"""\
torch::jit::Node* node = nullptr;
std::shared_ptr<jit::tracer::TracingState> tracer_state;
if (jit::tracer::isTracing()) {
tracer_state = jit::tracer::getTracingState();
at::Symbol op_name;
${set_op_name}
node = tracer_state->createNode(op_name, /*num_outputs=*/0);
jit::tracer::recordSourceLocation(node);
${add_trace_inputs}
tracer_state->insertNode(node);
${inplace_guard}
jit::tracer::setTracingState(nullptr);
}
"""
)
def format_prerecord_trace(f: NativeFunction) -> str:
if not should_trace(f):
return ""
# TODO: clean up old codegen behavior
is_inplace = (
f.func.kind() in (SchemaKind.inplace, SchemaKind.out)
and not f.func.name.name.dunder_method
)
add_args = (
RENAME_TRACE_ADD_ARGS.get(f.func.name.name.base, "") if is_inplace else ""
)
additional_inputs = (
SELECT.substitute(
cond="tracer_state->force_outplace",
true=add_args,
false="",
)
if add_args
else ""
)
return PRE_RECORD_TRACE.substitute(
set_op_name=format_trace_op_name(f),
add_trace_inputs=format_trace_inputs(f) + additional_inputs,
inplace_guard=INPLACE_GUARD.substitute(
name=cpp.name(f.func),
mutable_input=f.func.arguments.out[0].name
if f.func.arguments.out
else "self",
)
if is_inplace
else "",
)
POST_RECORD_TRACE = CodeTemplate(
"""\
if (tracer_state) {
jit::tracer::setTracingState(std::move(tracer_state));
${add_trace_outputs}
}
"""
)
def format_postrecord_trace(f: NativeFunction) -> str:
if not should_trace(f):
return ""
# For outplacing ops, *_out overloads require special handling to move the
# output *argument* to a return value
if f.func.is_out_fn():
output_names_outplace = [arg.name for arg in f.func.arguments.out]
output_names_inplace = cpp.return_names(f)
# Code size optimization: the common case is that the return value is
# the same for both variants
if output_names_outplace == output_names_inplace:
outputs = [
f"jit::tracer::addOutput(node, {n});" for n in output_names_outplace
]
return POST_RECORD_TRACE.substitute(add_trace_outputs=outputs)
selection = SELECT.substitute(
cond="force_outplace",
true="\n".join(
f"jit::tracer::addOutput(node, {n});" for n in output_names_outplace
),
false="\n".join(
f"jit::tracer::addOutput(node, {n});" for n in output_names_inplace
),
)
return POST_RECORD_TRACE.substitute(add_trace_outputs=selection)
else:
output_names = cpp.return_names(f)
outputs = [f"jit::tracer::addOutput(node, {n});" for n in output_names]
return POST_RECORD_TRACE.substitute(add_trace_outputs=outputs)
def tie_return_values(f: NativeFunction) -> str:
if len(f.func.returns) == 1:
return f'auto {f.func.returns[0].name or "result"}'
names = cpp.return_names(f)
return f'auto [{", ".join(names)}]'
def get_return_value(f: NativeFunction) -> str:
names = cpp.return_names(f)
if len(f.func.returns) == 1:
return names[0]
if f.func.kind() == SchemaKind.out:
return f'std::forward_as_tuple({", ".join(names)})'
else:
moved = ", ".join(f"std::move({name})" for name in names)
return f"std::make_tuple({moved})"
TRACE_DISPATCH = CodeTemplate(
"""\
${assign_return_values}at::_ops::${unambiguous_name}::redispatch(${unpacked_args});"""
)
def emit_trace_body(f: NativeFunction) -> list[str]:
trace_body: list[str] = []
trace_body.append(format_prerecord_trace(f))
dispatcher_sig = DispatcherSignature.from_schema(f.func)
dispatcher_exprs = dispatcher_sig.exprs()
# code-generated tracing kernels plumb and recompute dispatch keys directly through the kernel for performance.
# See Note [Plumbing Keys Through The Dispatcher] for details.
dispatch_key_set = "ks & c10::DispatchKeySet(c10::DispatchKeySet::FULL_AFTER, c10::DispatchKey::Tracer)"
redispatch_args = ", ".join([dispatch_key_set] + [a.expr for a in dispatcher_exprs])
assign_return_values = (
f"{tie_return_values(f)} = "
if f.func.kind() in [SchemaKind.functional, SchemaKind.mutable]
and f.func.returns
else ""
)
# Note that this calls the slow, dispatching variants of manual_cpp_binding ops.
# We could probably work harder to ensure that the fast variants are
# called instead, but the perf benefit would be minimal.
trace_body.append(
TRACE_DISPATCH.substitute(
assign_return_values=assign_return_values,
unambiguous_name=f.func.name.unambiguous_name(),
unpacked_args=redispatch_args,
)
)
trace_body.append(format_postrecord_trace(f))
if f.func.returns:
trace_body.append(f"return {get_return_value(f)};")
return trace_body
METHOD_DEFINITION = CodeTemplate(
"""\
${return_type} ${type_wrapper_name}(${formals}) {
${type_definition_body}
}
"""
)
def type_wrapper_name(f: NativeFunction, key: str = "Default") -> str:
if f.func.name.overload_name:
name = f"{cpp.name(f.func)}_{f.func.name.overload_name}"
else:
name = cpp.name(f.func)
# The key argument is only used in gen_variable_type where we need fns per autograd dispatch key.
# In gen_trace_type and gen_inplace_view_type where only one fn per native_fn must be generated,
# the key argument should not be passed.
# We do not append key if it is Default so that generated functions from
# before per-dispatch-key derivatives were added retain the same names.
if key != "Default":
name = name + f"_{key}"
return name
@with_native_function
def method_definition(f: NativeFunction) -> str:
assert cpp.name(f.func) not in MANUAL_TRACER
formals = ", ".join(
# code-generated tracing kernels plumb and recompute dispatch keys directly through the kernel for performance.
# See Note [Plumbing Keys Through The Dispatcher] for details.
["c10::DispatchKeySet ks"]
+ [
f'{cpp.argument_type(a, binds="__placeholder__", symint=True).cpp_type()} {a.name}'
for a in f.func.schema_order_arguments()
]
)
return METHOD_DEFINITION.substitute(
return_type=cpp.returns_type(f.func.returns, symint=True).cpp_type(),
type_wrapper_name=type_wrapper_name(f),
formals=formals,
type_definition_body=emit_trace_body(f),
)
WRAPPER_REGISTRATION = CodeTemplate(
"""\
m.impl("${name}",
TORCH_FN(${class_type}::${type_wrapper_name})
);
"""
)
@with_native_function
def method_registration(f: NativeFunction) -> str:
assert cpp.name(f.func) not in MANUAL_TRACER
return WRAPPER_REGISTRATION.substitute(
name=f.func.name,
type_wrapper_name=type_wrapper_name(f),
class_type="TraceType",
)
def gen_trace_type_func(fn: NativeFunction) -> dict[str, list[str]]:
return {
"ops_headers": [f"#include <ATen/ops/{fn.root_name}_ops.h>"],
"trace_method_definitions": [method_definition(fn)],
"trace_wrapper_registrations": [method_registration(fn)],
}
def gen_trace_type(
out: str, native_functions: list[NativeFunction], template_path: str
) -> None:
# NOTE: see Note [Sharded File] at the top of the VariableType.cpp
# template regarding sharding of the generated files.
fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
fm.write_sharded(
"TraceType.cpp",
[fn for fn in native_functions if cpp.name(fn.func) not in MANUAL_TRACER],
key_fn=lambda fn: fn.root_name,
base_env={
"generated_comment": "@"
+ f"generated from {fm.template_dir_for_comments()}/TraceType.cpp",
},
env_callable=gen_trace_type_func,
num_shards=5,
sharded_keys={
"ops_headers",
"trace_method_definitions",
"trace_wrapper_registrations",
},
)

View File

@ -0,0 +1,116 @@
# Generates C++ functions that wrap ATen tensor factory methods to turn them into Variables.
#
# This writes one file: variable_factories.h
from __future__ import annotations
import re
import torchgen.api.python as python
from torchgen.api import cpp
from torchgen.api.types import CppSignatureGroup
from torchgen.context import with_native_function
from torchgen.gen import parse_native_yaml
from torchgen.model import NativeFunction, TensorOptionsArguments, Variant
from torchgen.utils import FileManager, mapMaybe
OPTIONAL_TYPE_PATTERN = re.compile(r"std::optional<(.+)>")
TYPE_PATTERN = re.compile(r"(?:const\s+)?([A-Z]\w+)")
# Add 'at::' to types defined in ATen namespace, e.g. Tensor, TensorList, IntArrayRef and etc.
# TODO: maybe update the cpp argument API to take optional namespace argument?
def fully_qualified_type(argument_type: str) -> str:
def maybe_optional_type(type: str, is_opt: bool) -> str:
return f"std::optional<{type}>" if is_opt else type
opt_match = OPTIONAL_TYPE_PATTERN.match(argument_type)
is_opt = opt_match is not None
if opt_match:
argument_type = argument_type[opt_match.start(1) : opt_match.end(1)]
match = TYPE_PATTERN.match(argument_type)
if match is None:
return maybe_optional_type(argument_type, is_opt)
index = match.start(1)
qualified_type = f"{argument_type[:index]}at::{argument_type[index:]}"
return maybe_optional_type(qualified_type, is_opt)
def gen_variable_factories(
out: str, native_yaml_path: str, tags_yaml_path: str, template_path: str
) -> None:
native_functions = parse_native_yaml(
native_yaml_path, tags_yaml_path
).native_functions
factory_functions = [fn for fn in native_functions if is_factory_function(fn)]
fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
fm.write_with_template(
"variable_factories.h",
"variable_factories.h",
lambda: {
"generated_comment": "@"
+ f"generated from {fm.template_dir_for_comments()}/variable_factories.h",
"ops_headers": [
f"#include <ATen/ops/{fn.root_name}.h>" for fn in factory_functions
],
"function_definitions": list(mapMaybe(process_function, factory_functions)),
},
)
@with_native_function
def is_factory_function(f: NativeFunction) -> bool:
if Variant.function not in f.variants:
return False
name = cpp.name(f.func)
has_tensor_options = python.has_tensor_options(f)
return has_tensor_options or name.endswith("_like")
@with_native_function
def process_function(f: NativeFunction) -> str | None:
name = cpp.name(f.func)
has_tensor_options = python.has_tensor_options(f)
is_factory = has_tensor_options or name.endswith("_like")
if Variant.function not in f.variants or not is_factory:
return None
cpp_sigs = CppSignatureGroup.from_native_function(f, method=False)
sigs = [cpp_sigs.signature]
if cpp_sigs.symint_signature is not None:
sigs.append(cpp_sigs.symint_signature)
r = ""
for sig in sigs:
formals: list[str] = []
exprs: list[str] = []
requires_grad = "false"
for arg in sig.arguments():
qualified_type = fully_qualified_type(arg.type)
if arg.default:
formals.append(f"{qualified_type} {arg.name} = {arg.default}")
else:
formals.append(f"{qualified_type} {arg.name}")
if isinstance(arg.argument, TensorOptionsArguments):
# note: we remove the requires_grad setting from the TensorOptions because
# it is ignored anyways (and we actually have an assertion that it isn't set
# which would fail otherwise). We handle requires_grad explicitly here
# instead of passing it through to the kernel.
exprs.append(
f"at::TensorOptions({arg.name}).requires_grad(::std::nullopt)"
)
# Manually set the requires_grad bit on the result tensor.
requires_grad = f"{arg.name}.requires_grad()"
else:
exprs.append(arg.name)
r += f"""\
inline at::Tensor {sig.name()}({', '.join(formals)}) {{
at::AutoDispatchBelowADInplaceOrView guard;
return autograd::make_variable(at::{sig.name()}({', '.join(exprs)}), /*requires_grad=*/{requires_grad});
}}
"""
return r

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,340 @@
# Generates ViewFuncs.h/cpp
#
# NOTE: If any changes are being made to the ViewFunc codegen please also check
# if updates are needed in torch/csrc/autograd/autograd_not_implemented_fallback.cpp
# The fallback is expected to mimic this codegen, so we should keep the two in sync.
from __future__ import annotations
from typing import TYPE_CHECKING
import torchgen.api.dispatcher as dispatcher
from torchgen.api.translate import translate
from torchgen.api.types import (
BaseCType,
Binding,
NamedCType,
SymIntT,
tensorT,
VectorCType,
)
from torchgen.code_template import CodeTemplate
from torchgen.model import Argument, NativeFunction, OptionalType
from torchgen.utils import FileManager
from .gen_inplace_or_view_type import (
CALL_DISPATCH,
extract_bindings,
get_view_info,
modifies_arguments,
use_derived,
)
if TYPE_CHECKING:
from torchgen.api.autograd import NativeFunctionWithDifferentiabilityInfo
FUNCTION_DECLARATION = CodeTemplate(
"""\
#define ${uppercase_op}_AVAILABLE
struct ${op} : public ${superclass} {
${op}(${constructor_args}) ${initializer_list}
{};
virtual ~${op}() override {};
virtual std::vector<c10::SymInt> get_symints() const override;
virtual size_t num_symints() const override;
virtual std::vector<at::Tensor> get_tensors() const override;
virtual size_t num_tensors() const override;
virtual at::Tensor operator()(const at::Tensor&) const override;
virtual std::unique_ptr<ViewFunc> clone_and_set(
std::optional<std::vector<c10::SymInt>> = ::std::nullopt,
std::optional<std::vector<at::Tensor>> = ::std::nullopt) const override;
protected:
virtual void set_symints(std::vector<c10::SymInt>) override;
virtual void set_tensors(std::vector<at::Tensor>) override;
private:
${state}
};
"""
)
FUNCTION_DEFINITION = CodeTemplate(
"""\
std::vector<c10::SymInt> ${op}::get_symints() const {
${get_symints}
}
size_t ${op}::num_symints() const {
return static_cast<size_t>(${num_symints});
}
void ${op}::set_symints(std::vector<c10::SymInt> ${symints_vec}) {
TORCH_INTERNAL_ASSERT(${symints_vec}.size() == num_symints());
${set_symints}
}
std::vector<at::Tensor> ${op}::get_tensors() const {
${get_tensors}
}
size_t ${op}::num_tensors() const {
return static_cast<size_t>(${num_tensors});
}
void ${op}::set_tensors(std::vector<at::Tensor> ${tensors_vec}) {
TORCH_INTERNAL_ASSERT(${tensors_vec}.size() == num_tensors());
${set_tensors}
}
at::Tensor ${op}::operator()(const at::Tensor& ${call_input_name}) const {
return ${op_call};
}
std::unique_ptr<ViewFunc> ${op}::clone_and_set(
std::optional<std::vector<c10::SymInt>> ${symints_vec},
std::optional<std::vector<at::Tensor>> ${tensors_vec}) const {
auto output = std::make_unique<${op}>(${clone_args});
if (${symints_vec}.has_value()) {
output->set_symints(std::move(*(${symints_vec})));
}
if (${tensors_vec}.has_value()) {
output->set_tensors(std::move(*(${tensors_vec})));
}
return output;
}
"""
)
# e.g. as_strided -> AsStridedViewFunc for camel case or
# as_strided_view_func otherwise
def view_func_name(
f: NativeFunction, include_namespace: bool = False, camel_case: bool = True
) -> str:
name = f.func.name.unambiguous_name()
view_func_name = f"{name.replace('.', '_')}_view_func"
if camel_case:
is_private = view_func_name.startswith("_")
view_func_name = "".join(
[p.title() for p in view_func_name.replace(".", "_").split("_")]
)
if is_private:
# put the leading underscore back in
view_func_name = f"_{view_func_name}"
namespace = "torch::autograd::generated::" if include_namespace else ""
return f"{namespace}{view_func_name}"
def is_symint_or_tensor(arg: Argument) -> bool:
return arg.type.is_tensor_like() or arg.type.is_symint_like()
def remove_const_ref(binding: Binding) -> Binding:
return Binding(
name=binding.name,
nctype=binding.nctype.remove_const_ref(),
argument=binding.argument,
default=binding.default,
)
def returns_multi_tensor(fn: NativeFunction) -> bool:
returns = fn.func.returns
assert len(returns) == 1
returns_list_like = returns[0].type.is_list_like() is not None
returns_tensor_like = returns[0].type.is_tensor_like()
return returns_list_like and returns_tensor_like
# Generates strings with logic for getting / setting state of a particular type.
#
# Args:
# bindings (list): List of state bindings of interest (may be empty)
# state_vec_type (NamedCType): Type of vector to either return or copy from
#
# Returns:
# tuple: (list of getter logic strings, list of setter logic strings, string
# with num items expression)
def generate_state_getter_setter(
bindings: list[Binding],
state_vec_type: NamedCType,
) -> tuple[list[str], list[str], str]:
getter_logic = []
setter_logic = []
state_vec = state_vec_type.name
getter_logic.append(f"{state_vec_type.cpp_type()} {state_vec};")
if len(bindings) > 0:
setter_logic.append("auto i = 0;")
num_exprs = []
for i, b in enumerate(bindings):
assert isinstance(b.argument, Argument)
if b.argument.type.is_list_like():
# Handle list-likes.
num_expr = f"{b.name}.size()"
num_exprs.append(num_expr)
getter = f"{state_vec}.insert({state_vec}.end(), {b.name}.begin(), {b.name}.end());"
setter = f"std::copy({state_vec}.begin() + i, {state_vec}.begin() + i + {b.name}.size(), {b.name}.begin());"
elif isinstance(b.argument.type, OptionalType):
# Handle optionals.
num_expr = f"({b.name}.has_value() ? 1 : 0)"
num_exprs.append(num_expr)
conditional = f"if({b.name}.has_value())"
getter = (
f"{conditional} {state_vec}.insert({state_vec}.end(), *({b.name}));"
)
setter = f"{conditional} {b.name} = {state_vec}[i];"
else:
num_expr = "1"
num_exprs.append(num_expr)
getter = f"{state_vec}.push_back({b.name});"
setter = f"{b.name} = {state_vec}[i];"
getter_logic.append(getter)
setter_logic.append(setter)
if i < len(bindings) - 1:
setter_logic.append(f"i += {num_expr};")
# Reserve / assert based on the total number of items expression.
num_items = "0" if len(num_exprs) == 0 else " + ".join(num_exprs)
if len(bindings) > 0:
getter_logic.insert(1, f"{state_vec}.reserve({num_items});")
getter_logic.append(f"return {state_vec};")
return getter_logic, setter_logic, num_items
def process_function(fn: NativeFunction, template: CodeTemplate) -> str:
bindings = extract_bindings(fn)
non_self_bindings = [b for b in bindings if b.name != "self"]
non_self_args = fn.func.arguments.flat_all[1:]
non_self_value_bindings = [
dispatcher.argument(a, remove_non_owning_ref_types=True) for a in non_self_args
]
# Generate constructor / clone args for the generated struct.
constructor_args = [b.defn() for b in non_self_bindings]
clone_args = [b.name for b in non_self_bindings]
# Generate state variable declarations for the generated struct.
state_variables = [
f"{remove_const_ref(b).defn()};" for b in non_self_value_bindings
]
# Generate initializer list expressions for the generated struct.
# allow_expensive_conversions=True because we need to store e.g. SymIntArrayRefs as
# vector<SymInt>s.
init_exprs = translate(
non_self_bindings, non_self_value_bindings, allow_expensive_conversions=True
)
initializers = []
for b, init_expr in zip(non_self_bindings, init_exprs):
name = b.nctype.name
assert isinstance(name, str)
initializers.append(f"{name}({init_expr.expr})")
# Generate call to underlying view op
call_input_name = "input_base"
op_call_args = [call_input_name, *(b.name for b in non_self_bindings)]
op_call = CALL_DISPATCH.substitute(
unambiguous_name=fn.func.name.unambiguous_name(),
unpacked_args=op_call_args,
)
# Multi-output views additionally require a view_idx for disambiguation.
if returns_multi_tensor(fn):
view_idx_name = "view_idx"
view_idx_typename = "int64_t"
view_idx_decl = f"{view_idx_typename} {view_idx_name}"
constructor_args.append(view_idx_decl)
clone_args.append(view_idx_name)
state_variables.append(f"{view_idx_decl};")
initializers.append(f"{view_idx_name}({view_idx_name})")
op_call += f"[{view_idx_name}]"
# Generate initializer list for the generated struct.
initializer_list = f": {', '.join(initializers)}" if len(initializers) > 0 else ""
# Generate getter / setter logic for any symints.
symint_bindings = [
b
for b in non_self_bindings
if isinstance(b.argument, Argument) and b.argument.type.is_symint_like()
]
symints_vec_type = NamedCType("symints", VectorCType(BaseCType(SymIntT)))
get_symints, set_symints, num_symints = generate_state_getter_setter(
symint_bindings, symints_vec_type
)
# Generate getter / setter logic for any tensors.
tensor_bindings = [
b
for b in non_self_bindings
if isinstance(b.argument, Argument) and b.argument.type.is_tensor_like()
]
tensors_vec_type = NamedCType("tensors", VectorCType(BaseCType(tensorT)))
get_tensors, set_tensors, num_tensors = generate_state_getter_setter(
tensor_bindings, tensors_vec_type
)
return template.substitute(
op=view_func_name(fn),
uppercase_op=view_func_name(fn, camel_case=False).upper(),
superclass="torch::autograd::ViewFunc",
initializer_list=initializer_list,
state=state_variables,
constructor_args=constructor_args,
clone_args=clone_args,
symints_vec=symints_vec_type.name,
get_symints=get_symints,
set_symints=set_symints,
num_symints=num_symints,
tensors_vec=tensors_vec_type.name,
get_tensors=get_tensors,
set_tensors=set_tensors,
num_tensors=num_tensors,
call_input_name=call_input_name,
op_call=op_call,
)
def gen_view_funcs(
out: str,
fns_with_infos: list[NativeFunctionWithDifferentiabilityInfo],
template_path: str,
) -> None:
# don't need the info parts, just the function
fns = [fn.func for fn in fns_with_infos if use_derived(fn)]
# only want out-of-place views
view_fns = [
fn for fn in fns if get_view_info(fn) is not None and not modifies_arguments(fn)
]
declarations = [process_function(fn, FUNCTION_DECLARATION) for fn in view_fns]
definitions = [process_function(fn, FUNCTION_DEFINITION) for fn in view_fns]
ops_headers = [f"#include <ATen/ops/{fn.root_name}_ops.h>" for fn in view_fns]
file_basename = "ViewFuncs"
fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
for suffix in [".h", ".cpp"]:
fname = file_basename + suffix
fm.write_with_template(
fname,
fname,
lambda: {
"generated_comment": "@"
+ f"generated from {fm.template_dir_for_comments()}/"
+ fname,
"view_func_declarations": declarations,
"view_func_definitions": definitions,
"ops_headers": ops_headers,
},
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,38 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include "torch/csrc/autograd/VariableTypeUtils.h"
#include "torch/csrc/autograd/generated/ViewFuncs.h"
#include <torch/library.h>
#include <ATen/FunctionalInverses.h>
#include <ATen/FunctionalTensorWrapper.h>
// ${generated_comment}
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
$ops_headers
#endif
using namespace at;
using torch::autograd::CreationMeta;
using torch::autograd::as_view;
using torch::autograd::increment_version;
namespace torch {
namespace ADInplaceOrView {
namespace {
${inplace_or_view_method_definitions}
} // namespace
} // namespace ADInplaceOrView
namespace {
TORCH_LIBRARY_IMPL(aten, ADInplaceOrView, m) {
${inplace_or_view_wrapper_registrations};
}
} // namespace
} // namespace torch

View File

@ -0,0 +1,20 @@
#include "torch/csrc/autograd/FunctionsManual.h"
#include "torch/csrc/dynamo/compiled_autograd.h"
// ${generated_comment}
// The manual function definitions that used to be here are now in torch/csrc/autograd/FunctionsManual.cpp
// This speeds up re-compilation and allow to share these implementations so that they can be
// used for forward mode AD formulas as well.
using namespace torch::autograd::generated::details;
using at::Tensor;
using at::Scalar;
using at::IntArrayRef;
using at::TensorList;
namespace torch::autograd::generated {
${autograd_function_definitions}
} // namespace torch::autograd::generated

View File

@ -0,0 +1,51 @@
#pragma once
// ${generated_comment}
#include <ATen/ATen.h>
#include <ATen/core/functional.h>
#include <ATen/TensorGeometry.h>
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/autograd/variable.h"
#include "torch/csrc/autograd/saved_variable.h"
#include <torch/csrc/Export.h>
#include <c10/core/SymIntArrayRef.h>
namespace torch { namespace autograd { namespace generated {
using at::Scalar;
using at::Tensor;
using at::IntArrayRef;
using at::ArrayRef;
using at::Type;
using at::TensorGeometry;
using at::ScalarType;
using std::optional;
using c10::fmap;
inline std::vector<Tensor> unpack_list(at::ArrayRef<SavedVariable> xs, std::shared_ptr<Node> saved_for = nullptr) {
// NB: we must explicitly do the conversion in the lambda, otherwise template
// deduction will give a Tensor of Variable which is not convertible
return fmap(xs, [&saved_for](const SavedVariable& x) {
// TODO(crcrpar): Use `std::move(saved_for)` to avoid incrementing refcount, which would need refactoring.
return static_cast<Tensor>(x.unpack(saved_for));
});
}
inline c10::List<std::optional<Tensor>> unpack_opt_list(at::ArrayRef<SavedVariable> xs, std::shared_ptr<Node> saved_for = nullptr) {
torch::List<std::optional<Tensor>> result;
result.reserve(xs.size());
for (const SavedVariable& v : xs) {
auto var = v.unpack(saved_for);
result.push_back(var.defined() ? std::optional<Tensor>(var) : ::std::nullopt);
}
return result;
}
using torch::autograd::TypeAndSize;
${autograd_function_declarations}
}}} // namespace torch::autograd::generated

View File

@ -0,0 +1,40 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include "torch/csrc/jit/frontend/tracer.h"
#include <torch/library.h>
#include "torch/csrc/autograd/function.h"
#include "ATen/quantized/Quantizer.h"
// ${generated_comment}
// See the `Tracer` section in `torch/csrc/jit/OVERVIEW.md`.
// NOTE See [Sharded File] comment in VariableType
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
$ops_headers
#endif
using namespace at;
namespace torch {
namespace TraceType {
namespace {
${trace_method_definitions}
} // namespace
} // namespace TraceType
namespace {
TORCH_LIBRARY_IMPL(aten, Tracer, m) {
${trace_wrapper_registrations};
}
} // namespace
} // namespace torch

View File

@ -0,0 +1,65 @@
#include "torch/csrc/autograd/VariableTypeUtils.h"
#include "torch/csrc/autograd/generated/VariableType.h"
#include "torch/csrc/autograd/FunctionsManual.h"
#include <ATen/RedispatchFunctions.h>
#include <c10/core/impl/TorchDispatchModeTLS.h>
#include <ATen/core/TorchDispatchUtils.h>
#include <torch/library.h>
#include <ATen/SparseCsrTensorUtils.h>
// ${generated_comment}
// NOTE [Sharded File]: on this file's split-into-shards state
//
// Back in the good old days, VariableType.cpp was generated as one
// file with every function in it, and everything was great and
// simple.
//
// However, this file was also very large (over 36,000 lines), and
// compiling it was very slow, and in fact was a significant
// bottleneck for incremental rebuilds. To address this, we now
// generate the file split across multiple shards, named
// VariableType_0.cpp and so on, which can be compiled in parallel.
//
// For ease of inspection and debugging, so that it's not necessary to
// go rooting around in multiple files, we also generate all the
// functions together in VariableTypeEverything.cpp. This generated
// file is only for convenience; it's not actually used in the
// build. If the file you're looking at now is one of the shards, you
// may want to switch over to the Everything variant to make you
// grepping smoother.
using namespace at;
using namespace torch::autograd::generated;
using namespace torch::autograd::generated::details;
namespace torch::autograd {
namespace VariableType {
namespace{
C10_UNUSED void reset_grad_accumulator(Variable & self) {
AutogradMeta* meta = torch::autograd::impl::get_autograd_meta(self);
if (meta != nullptr) {
meta->grad_accumulator_.reset();
}
}
}
namespace {
${type_derived_method_definitions}
}
}
namespace {
${wrapper_registrations}
}
} // namespace torch::autograd

View File

@ -0,0 +1,59 @@
#pragma once
// ${generated_comment}
#include <ATen/core/Tensor.h>
#include <ATen/Context.h>
#include <c10/util/intrusive_ptr.h>
#include <torch/csrc/Export.h>
#include <torch/csrc/autograd/autograd_not_implemented_fallback.h>
#include <cstdint> // for size_t
#include <functional> // for function
#include <memory> // for unique_ptr
#include <string>
#include <vector>
namespace at {
struct Quantizer;
};
namespace torch { namespace autograd {
using Variable = at::Tensor;
using at::Context;
using at::Device;
using at::Dimname;
using at::DimnameList;
using at::Generator;
using at::IntArrayRef;
using at::MemoryFormat;
using at::QScheme;
using at::Scalar;
using at::ScalarType;
using at::Storage;
using at::Tensor;
using at::TensorList;
using at::TensorOptions;
using at::Quantizer;
// This is temporary typedef to enable Quantizer in aten native function API
// we'll remove them when we are actually exposing Quantizer class
// to frontend
using ConstQuantizerPtr = const c10::intrusive_ptr<Quantizer>&;
using std::optional;
namespace VariableType {
TORCH_API std::vector<at::DeprecatedTypeProperties*> allCUDATypes();
TORCH_API std::vector<at::DeprecatedTypeProperties*> allXPUTypes();
TORCH_API std::vector<at::DeprecatedTypeProperties*> allCPUTypes();
TORCH_API std::vector<at::DeprecatedTypeProperties*> allPrivateUser1Types();
at::Tensor & unpack(Tensor & t, const char * name, int pos);
const at::Tensor & unpack(const Tensor & t, const char * name, int pos);
at::Tensor unpack_opt(const Tensor & t, const char * name, int pos);
std::vector<at::Tensor> unpack(const at::ITensorListRef& tl, const char *name, int pos);
};
}} // namespace torch::autograd

View File

@ -0,0 +1,14 @@
#include <torch/csrc/autograd/generated/ViewFuncs.h>
// ${generated_comment}
using at::Tensor;
using at::Scalar;
using at::IntArrayRef;
using at::TensorList;
namespace torch::autograd::generated {
${view_func_definitions}
} // namespace torch::autograd::generated

View File

@ -0,0 +1,28 @@
#pragma once
// ${generated_comment}
#include <torch/library.h>
#include <torch/csrc/autograd/variable.h>
#include <c10/core/SymIntArrayRef.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
$ops_headers
#endif
namespace torch::autograd::generated {
using at::Scalar;
using at::Tensor;
using at::IntArrayRef;
using at::ArrayRef;
using at::Type;
using at::ScalarType;
using std::optional;
using c10::fmap;
${view_func_declarations}
} // namespace torch::autograd::generated

View File

@ -0,0 +1,11 @@
"""
This file is needed for generating procedural tests required for
testing __torch_function__. See tests/test_overrides.py.
"""
# flake8: noqa
import torch
annotated_args = {
${annotated_args}
}

View File

@ -0,0 +1,15 @@
#include <torch/csrc/autograd/python_enum_tag.h>
#include <torch/csrc/utils/pybind.h>
#include <pybind11/pybind11.h>
#include <ATen/core/enum_tag.h>
namespace py = pybind11;
namespace torch {
namespace autograd {
void initEnumTag(PyObject* module) {
auto m = py::handle(module).cast<py::module>();
py::enum_<at::Tag>(m, "Tag")
${enum_of_valid_tags};
m.doc() = "An Enum that contains tags that can be assigned to an operator registered in C++.";
}
}}

View File

@ -0,0 +1,81 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include "torch/csrc/Device.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_fft_functions.h"
#include "torch/csrc/autograd/generated/python_return_types.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/autograd/utils/python_arg_parsing.h"
#include "torch/csrc/autograd/generated/variable_factories.h"
#include "torch/csrc/utils/out_types.h"
#include "torch/csrc/utils/pycfunction_helpers.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/structseq.h"
#include "torch/csrc/utils/device_lazy_init.h"
#include <ATen/core/Tensor.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
$ops_headers
#endif
using at::Tensor;
using at::Device;
using at::Layout;
using at::Scalar;
using at::ScalarType;
using at::Backend;
using at::OptionalDeviceGuard;
using at::DeviceGuard;
using at::TensorOptions;
using at::IntArrayRef;
using at::Generator;
using at::TensorList;
using at::Dimname;
using at::DimnameList;
using torch::utils::check_out_type_matches;
using namespace torch::autograd::utils;
namespace torch::autograd {
// generated forward declarations start here
${py_forwards}
static PyMethodDef fft_functions[] = {
${py_method_defs}
{NULL}
};
static PyObject* THPFFTVariableFunctionsModule = NULL;
void initFFTFunctions(PyObject* module) {
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
"torch._C._fft",
NULL,
-1,
fft_functions
};
PyObject* fft = PyModule_Create(&def);
THPFFTVariableFunctionsModule = fft;
if (!fft) {
throw python_error();
}
// steals a reference to fft
if (PyModule_AddObject(module, "_fft", fft) != 0) {
throw python_error();
}
}
// generated methods start here
${py_methods}
} // namespace torch::autograd

View File

@ -0,0 +1,37 @@
#include <torch/csrc/autograd/generated/python_functions.h>
// ${generated_comment}
#include <Python.h>
#include <ATen/ATen.h>
#include <c10/core/SymNodeImpl.h>
#include "torch/csrc/autograd/generated/Functions.h"
#include "torch/csrc/autograd/python_cpp_function.h"
#include <torch/csrc/autograd/python_variable.h>
#include <torch/csrc/autograd/saved_variable.h>
#include <torch/csrc/utils/pybind.h>
#include <pybind11/pybind11.h>
#include <torch/csrc/utils/pybind.h>
// NOTE: See [Sharded File] comment in VariableType
namespace torch::autograd::generated {
template<typename C>
static void addClass(PyObject* module, PyTypeObject& type, const char* name,
PyGetSetDef* function_properties=NULL, PyMethodDef* function_methods=NULL)
{
_initFunctionPyTypeObject(type, name, function_properties, function_methods);
Py_INCREF(&type);
PyModule_AddObject(module, name, (PyObject*)&type);
registerCppFunction(typeid(C), &type);
}
${py_function_props_and_getters}
void initialize_autogenerated_functions${shard_id}(PyObject* module) {
${py_function_initializers}
}
} // namespace torch::autograd::generated

View File

@ -0,0 +1,17 @@
#pragma once
#include <Python.h>
// ${generated_comment}
// Python bindings for automatically generated autograd functions
namespace torch { namespace autograd { namespace generated {
${shard_forward_declare}
inline void initialize_autogenerated_functions(PyObject* module) {
${shard_call}
}
}}} // namespace torch::autograd::generated

View File

@ -0,0 +1,68 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include "torch/csrc/Device.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_linalg_functions.h"
#include "torch/csrc/autograd/generated/python_return_types.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/autograd/utils/python_arg_parsing.h"
#include "torch/csrc/utils/pycfunction_helpers.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/structseq.h"
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
$ops_headers
#endif
using at::Tensor;
using at::Scalar;
using at::ScalarType;
using at::MemoryFormat;
using at::Generator;
using at::IntArrayRef;
using at::TensorList;
using namespace torch::autograd::utils;
namespace torch::autograd {
// generated forward declarations start here
${py_forwards}
static PyMethodDef linalg_functions[] = {
${py_method_defs}
{NULL}
};
static PyObject* THPLinalgVariableFunctionsModule = NULL;
void initLinalgFunctions(PyObject* module) {
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
"torch._C._linalg",
NULL,
-1,
linalg_functions
};
PyObject* linalg = PyModule_Create(&def);
THPLinalgVariableFunctionsModule = linalg;
if (!linalg) {
throw python_error();
}
// steals a reference to linalg
if (PyModule_AddObject(module, "_linalg", linalg) != 0) {
throw python_error();
}
}
// generated methods start here
${py_methods}
} // namespace torch::autograd

View File

@ -0,0 +1,81 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include "torch/csrc/Device.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_nested_functions.h"
#include "torch/csrc/autograd/generated/python_return_types.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/autograd/utils/python_arg_parsing.h"
#include "torch/csrc/autograd/generated/variable_factories.h"
#include "torch/csrc/utils/out_types.h"
#include "torch/csrc/utils/pycfunction_helpers.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/structseq.h"
#include "torch/csrc/utils/device_lazy_init.h"
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
$ops_headers
#endif
using at::Tensor;
using at::Device;
using at::Layout;
using at::Scalar;
using at::ScalarType;
using at::Backend;
using at::OptionalDeviceGuard;
using at::DeviceGuard;
using at::TensorOptions;
using at::IntArrayRef;
using at::OptionalIntArrayRef;
using at::Generator;
using at::TensorList;
using at::Dimname;
using at::DimnameList;
using namespace torch::autograd::utils;
namespace torch::autograd {
// generated forward declarations start here
${py_forwards}
static PyMethodDef nested_functions[] = {
{NULL, NULL, 0, NULL},
${py_method_defs}
{NULL}
};
static PyObject* THPNestedVariableFunctionsModule = NULL;
void initNestedFunctions(PyObject* module) {
nested_functions[0] = get_nested_functions_manual()[0];
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
"torch._C._nested",
NULL,
-1,
nested_functions
};
PyObject* nested = PyModule_Create(&def);
THPNestedVariableFunctionsModule = nested;
if (!nested) {
throw python_error();
}
// steals a reference to nested
if (PyModule_AddObject(module, "_nested", nested) != 0) {
throw python_error();
}
}
// generated methods start here
${py_methods}
} // namespace torch::autograd

View File

@ -0,0 +1,113 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include "torch/csrc/Device.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_nn_functions.h"
#include "torch/csrc/autograd/generated/python_return_types.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/autograd/utils/python_arg_parsing.h"
#include "torch/csrc/utils/pycfunction_helpers.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/structseq.h"
#include "torch/csrc/utils/tensor_memoryformats.h"
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
$ops_headers
#endif
using at::Tensor;
using at::Scalar;
using at::MemoryFormat;
using at::Generator;
using at::IntArrayRef;
using at::ArrayRef;
using namespace torch::autograd::utils;
namespace torch::autograd {
static PyObject* THPNNVariableFunctionsModule = NULL;
static PyObject * THPVariable__parse_to(PyObject* module, PyObject* args, PyObject* kwargs)
{
HANDLE_TH_ERRORS
static PythonArgParser parser({
"to(Device device=None, ScalarType dtype=None, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
"to(ScalarType dtype, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
"to(Tensor tensor, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
});
ParsedArgs<5> parsed_args;
auto r = parser.parse(args, kwargs, parsed_args);
if (r.has_torch_function()) {
return handle_torch_function(r, args, kwargs, THPNNVariableFunctionsModule, "torch.nn", "_parse_to");
}
auto parsed = parse_to_conversion(r, /*allow_copy*/ false); // we don't want copy for nn.Module.to
auto& device = std::get<0>(parsed);
auto& scalarType = std::get<1>(parsed);
auto non_blocking = std::get<2>(parsed);
auto opt_memory_format = std::get<4>(parsed);
auto tuple = THPObjectPtr{PyTuple_New(4)};
if (!tuple) throw python_error();
if (device) {
PyTuple_SET_ITEM(tuple.get(), 0, THPDevice_New(*device));
} else {
Py_INCREF(Py_None);
PyTuple_SET_ITEM(tuple.get(), 0, Py_None);
}
if (scalarType) {
PyTuple_SET_ITEM(tuple.get(), 1, Py_NewRef(torch::getTHPDtype(*scalarType)));
} else {
Py_INCREF(Py_None);
PyTuple_SET_ITEM(tuple.get(), 1, Py_None);
}
PyTuple_SET_ITEM(tuple.get(), 2, torch::autograd::utils::wrap(non_blocking));
if (opt_memory_format.has_value()) {
PyTuple_SET_ITEM(tuple.get(), 3, Py_NewRef(torch::utils::getTHPMemoryFormat(opt_memory_format.value())));
} else {
Py_INCREF(Py_None);
PyTuple_SET_ITEM(tuple.get(), 3, Py_None);
}
return tuple.release();
END_HANDLE_TH_ERRORS
}
// generated forward declarations start here
${py_forwards}
static PyMethodDef nn_functions[] = {
{"_parse_to", castPyCFunctionWithKeywords(THPVariable__parse_to),
METH_VARARGS | METH_KEYWORDS, nullptr},
${py_method_defs}
{NULL}
};
void initNNFunctions(PyObject* module) {
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
"torch._C._nn",
NULL,
-1,
nn_functions
};
PyObject* nn = PyModule_Create(&def);
THPNNVariableFunctionsModule = nn;
if (!nn) {
throw python_error();
}
// steals a reference to nn
if (PyModule_AddObject(module, "_nn", nn) != 0) {
throw python_error();
}
}
// generated methods start here
${py_methods}
} // namespace torch::autograd

View File

@ -0,0 +1,52 @@
#include <Python.h>
#include <vector>
#include <map>
#include <string>
#include "torch/csrc/autograd/generated/python_return_types.h"
#include "torch/csrc/utils/structseq.h"
#include "torch/csrc/Exceptions.h"
namespace torch { namespace autograd { namespace generated {
${py_return_types}
}}}
namespace torch::autograd {
static void addReturnType(
PyObject* module,
const char* name,
PyTypeObject* type) {
// hold onto the TypeObject for the unlikely case of user
// deleting or overriding it.
Py_INCREF(type);
if (PyModule_AddObject(
module,
name,
(PyObject*)type) != 0) {
Py_DECREF(type);
throw python_error();
}
}
void initReturnTypes(PyObject* module) {
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT, "torch._C._return_types", nullptr, -1, {}};
PyObject* return_types_module = PyModule_Create(&def);
if (!return_types_module) {
throw python_error();
}
${py_return_types_registrations}
// steals a reference to return_types on success
if (PyModule_AddObject(module, "_return_types", return_types_module) != 0) {
Py_DECREF(return_types_module);
throw python_error();
}
}
} // namespace torch::autograd

View File

@ -0,0 +1,14 @@
#pragma once
namespace torch {
namespace autograd {
namespace generated {
${py_return_types_declarations}
}
void initReturnTypes(PyObject* module);
} // namespace autograd
} // namespace torch

View File

@ -0,0 +1,67 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include "torch/csrc/Device.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_sparse_functions.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/autograd/utils/python_arg_parsing.h"
#include "torch/csrc/utils/pycfunction_helpers.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/structseq.h"
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
$ops_headers
#endif
using at::Tensor;
using at::Scalar;
using at::ScalarType;
using at::MemoryFormat;
using at::Generator;
using at::IntArrayRef;
using at::TensorList;
using namespace torch::autograd::utils;
namespace torch::autograd {
// generated forward declarations start here
${py_forwards}
static PyMethodDef sparse_functions[] = {
${py_method_defs}
{NULL}
};
static PyObject* THPSparseVariableFunctionsModule = NULL;
void initSparseFunctions(PyObject* module) {
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
"torch._C._sparse",
NULL,
-1,
sparse_functions
};
PyObject* sparse = PyModule_Create(&def);
THPSparseVariableFunctionsModule = sparse;
if (!sparse) {
throw python_error();
}
// steals a reference to sparse
if (PyModule_AddObject(module, "_sparse", sparse) != 0) {
throw python_error();
}
}
// generated methods start here
${py_methods}
} // namespace torch::autograd

View File

@ -0,0 +1,79 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include "torch/csrc/Device.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_special_functions.h"
#include "torch/csrc/autograd/generated/python_return_types.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/autograd/utils/python_arg_parsing.h"
#include "torch/csrc/autograd/generated/variable_factories.h"
#include "torch/csrc/utils/out_types.h"
#include "torch/csrc/utils/pycfunction_helpers.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/structseq.h"
#include "torch/csrc/utils/device_lazy_init.h"
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
$ops_headers
#endif
using at::Tensor;
using at::Device;
using at::Layout;
using at::Scalar;
using at::ScalarType;
using at::Backend;
using at::OptionalDeviceGuard;
using at::DeviceGuard;
using at::TensorOptions;
using at::IntArrayRef;
using at::Generator;
using at::TensorList;
using at::Dimname;
using at::DimnameList;
using torch::utils::check_out_type_matches;
using namespace torch::autograd::utils;
namespace torch::autograd {
// generated forward declarations start here
${py_forwards}
static PyMethodDef special_functions[] = {
${py_method_defs}
{NULL}
};
static PyObject* THPSpecialVariableFunctionsModule = NULL;
void initSpecialFunctions(PyObject* module) {
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
"torch._C._special",
NULL,
-1,
special_functions
};
PyObject* special = PyModule_Create(&def);
THPSpecialVariableFunctionsModule = special;
if (!special) {
throw python_error();
}
// steals a reference to special
if (PyModule_AddObject(module, "_special", special) != 0) {
throw python_error();
}
}
// generated methods start here
${py_methods}
} // namespace torch::autograd

View File

@ -0,0 +1,93 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
// Python bindings for torch.* functions implemented through ATen.
//
// The functions are bound as static methods on a class
// torch._C._VariableFunctions which is also aliased as Variable._torch
// and also copied into 'torch' module.
#include <Python.h>
// Undefine the copysign macro so that at::copysign works as intended with MSVC
// https://github.com/python/cpython/blob/c60394c7fc9cc09b16e9675a3eeb5844b6d8523f/PC/pyconfig.h#L196
#ifdef _MSC_VER
#undef copysign
#endif // _MSC_VER
#include "torch/csrc/autograd/python_torch_functions.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/Dtype.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/utils/out_types.h"
#include "torch/csrc/utils/pybind.h"
#include "torch/csrc/utils/pycfunction_helpers.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/tensor_layouts.h"
#include "torch/csrc/utils/tensor_new.h"
#include "torch/csrc/utils/tensor_numpy.h"
#include "torch/csrc/jit/frontend/tracer.h"
#include "torch/csrc/autograd/generated/variable_factories.h"
#include "torch/csrc/utils/structseq.h"
#include "torch/csrc/utils/device_lazy_init.h"
#include "torch/csrc/autograd/generated/python_return_types.h"
#include <ATen/core/Tensor.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
$ops_headers
#endif
#include <functional>
#include <initializer_list>
#include <stdexcept>
#include <utility>
using at::Tensor;
using at::Device;
using at::Layout;
using at::Scalar;
using at::ScalarType;
using at::Backend;
using at::OptionalDeviceGuard;
using at::DeviceGuard;
using at::TensorOptions;
using at::IntArrayRef;
using at::Generator;
using at::TensorList;
using at::Dimname;
using at::DimnameList;
using at::ArrayRef;
using torch::utils::check_out_type_matches;
using namespace torch::autograd::utils;
// NOTE: See [Sharded File] comment in VariableType
namespace torch::autograd {
// generated forward declarations start here
${py_forwards}
static PyMethodDef torch_functions_shard[] = {
${py_method_defs}
};
void gatherTorchFunctions${shard_id}(std::vector<PyMethodDef> &torch_functions) {
constexpr size_t num_functions = sizeof(torch_functions_shard) / sizeof(torch_functions_shard[0]);
torch_functions.insert(
torch_functions.end(),
torch_functions_shard,
torch_functions_shard + num_functions);
}
// generated methods start here
${py_methods}
} // namespace torch::autograd

View File

@ -0,0 +1,135 @@
#pragma once
// ${generated_comment}
#include <ATen/core/Tensor.h>
#include <ATen/TracerMode.h>
#include <ATen/core/grad_mode.h>
#include <c10/util/ArrayRef.h>
#include <c10/core/MemoryFormat.h>
#include <torch/csrc/api/include/torch/detail/TensorDataContainer.h>
#include <torch/csrc/autograd/variable.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
#include <ATen/ops/from_blob.h>
$ops_headers
#endif
#include <functional>
#include <initializer_list>
#include <utility>
namespace torch {
/// NOTE: Currently `torch::tensor(...)` doesn't support mixed data types
/// (i.e. `torch::tensor({{bool, 2.0}})` doesn't work). We might be able to
/// support it in the future by iterating over all sub-lists to find
/// the largest data type that can represent all of the elements, or by using
/// variadic templates.
///
/// NOTE: C++ `torch::tensor` with a floating-point type or an `at::ArrayRef` / `std::vector` /
/// (nested) braced-init-list of floating-point types always produces a tensor of dtype
/// `torch::get_default_dtype()`, matching Python `torch.tensor` behavior.
///
/// NOTE: C++ `torch::tensor` with an integer type or an `at::ArrayRef` / `std::vector` /
/// (nested) braced-init-list of integer types always produces a tensor of dtype `at::kLong`
/// (aka. int64_t), matching Python `torch.tensor` behavior.
///
/// NOTE: The following dtypes are not supported by `torch::tensor` currently:
/// - `unsigned int`
/// - `unsigned long int`
/// - `unsigned long long int`
/// - `long long int`
inline at::Tensor tensor(detail::TensorDataContainer tensor_data_container, const at::TensorOptions& options = {}) {
return autograd::make_variable(
// note: we remove the requires_grad setting from the TensorOptions because
// it is ignored anyways (and we actually have an assertion that it isn't set
// which would fail otherwise). We handle requires_grad explicitly here
// instead of passing it through to the kernel.
tensor_data_container.convert_to_tensor(options.requires_grad(::std::nullopt)),
options.requires_grad());
}
/// A generic deleter function.
using Deleter = std::function<void(void*)>;
using at::MemoryFormat;
/// Exposes the given `data` as a `Tensor` without taking ownership of the
/// original data. `sizes` should specify the shape of the tensor, `strides` the
/// stride in each dimension. The `deleter` function (a
/// `std::function<void(void*)>`) will be called on the `data` when the Tensor
/// data would normally be deallocated. The `TensorOptions` specify additional
/// configuration options for the returned tensor, such as what type to
/// interpret the `data` as.
inline at::Tensor from_blob(
void* data,
at::IntArrayRef sizes,
at::IntArrayRef strides,
const Deleter& deleter,
const at::TensorOptions& options = at::TensorOptions()) {
at::Tensor tensor = ([&]() {
at::AutoDispatchBelowAutograd guard; // TODO: remove
at::tracer::impl::NoTracerDispatchMode tracer_guard;
return at::from_blob(data, sizes, strides, deleter, options.requires_grad(::std::nullopt));
})();
return autograd::make_variable(tensor, options.requires_grad());
}
/// Exposes the given `data` as a `Tensor` without taking ownership of the
/// original data. `sizes` should specify the shape of the tensor, `strides` the
/// stride in each dimension. The `TensorOptions`
/// specify additional configuration options for the returned tensor, such as
/// what type to interpret the `data` as.
inline at::Tensor from_blob(
void* data,
at::IntArrayRef sizes,
at::IntArrayRef strides,
const at::TensorOptions& options = at::TensorOptions()) {
at::Tensor tensor = ([&]() {
at::AutoDispatchBelowAutograd guard; // TODO: remove
at::tracer::impl::NoTracerDispatchMode tracer_guard;
return at::from_blob(data, sizes, strides, options.requires_grad(::std::nullopt));
})();
return autograd::make_variable(tensor, options.requires_grad());
}
/// Exposes the given `data` as a `Tensor` without taking ownership of the
/// original data. `sizes` should specify the shape of the tensor. The `deleter`
/// (a `std::function<void(void*)>`) function will be called on the `data` when
/// the Tensor data would normally be deallocated. The `TensorOptions` specify
/// additional configuration options for the returned tensor, such as what type
/// to interpret the `data` as.
inline at::Tensor from_blob(
void* data,
at::IntArrayRef sizes,
const Deleter& deleter,
const at::TensorOptions& options = at::TensorOptions()) {
at::Tensor tensor = ([&]() {
at::AutoDispatchBelowAutograd guard; // TODO: remove
at::tracer::impl::NoTracerDispatchMode tracer_guard;
return at::from_blob(data, sizes, deleter, options.requires_grad(::std::nullopt));
})();
return autograd::make_variable(tensor, options.requires_grad());
}
/// Exposes the given `data` as a `Tensor` without taking ownership of the
/// original data. `sizes` should specify the shape of the tensor. The
/// `TensorOptions` specify additional configuration options for the returned
/// tensor, such as what type to interpret the `data` as.
inline at::Tensor from_blob(
void* data,
at::IntArrayRef sizes,
const at::TensorOptions& options = at::TensorOptions()) {
at::Tensor tensor = ([&]() {
at::AutoDispatchBelowAutograd guard; // TODO: remove
at::tracer::impl::NoTracerDispatchMode tracer_guard;
return at::from_blob(data, sizes, options.requires_grad(::std::nullopt));
})();
return autograd::make_variable(tensor, options.requires_grad());
}
${function_definitions}
} // namespace torch