I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,36 @@
#include <ATen/core/ATenOpList.h>
#include <string>
#include <cstring>
#include <utility>
#include <unordered_set>
#include <ATen/core/operator_name.h>
// ${generated_comment}
namespace at {
namespace {
struct OpNameEquals final {
bool operator()(const std::pair<const char*, const char*>& lhs, const std::pair<const char*, const char*>& rhs) const {
return 0 == strcmp(lhs.first, rhs.first) && 0 == strcmp(lhs.second, rhs.second);
}
};
struct OpNameHash final {
size_t operator()(const std::pair<const char*, const char*>& p) const {
// use std::hash<std::string> because std::hash<const char*> would hash pointers and not pointed-to strings
return std::hash<std::string>()(p.first) ^ (~ std::hash<std::string>()(p.second));
}
};
}
bool is_custom_op(const c10::OperatorName& opName) {
static std::unordered_set<std::pair<const char*, const char*>, OpNameHash, OpNameEquals> ops {
${aten_ops}
{"", ""}
};
return ops.count(std::make_pair(
opName.name.c_str(), opName.overload_name.c_str())) == 0;
}
}

View File

@ -0,0 +1,73 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include <ATen/InferSize.h>
#include <ATen/Tensor.h>
#include <ATen/native/Resize.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
#include <ATen/ops/clone.h>
$ops_headers
#endif
namespace at {
namespace native {
// This file contains a number of kernels for aten functions that are fully code-generated.
// TODO: rename this file to something more generic.
namespace {
at::Tensor clone_arg(const at::Tensor& t) {
return t.clone();
}
std::vector<at::Tensor> clone_arg(const at::TensorList& t_list) {
std::vector<at::Tensor> out(t_list.size());
for (const auto& i : c10::irange(t_list.size())) {
out[i] = t_list[i].clone();
}
return out;
}
// duped with gen_resize_out_helper from structured kernels
void copy_arg(const at::Tensor& dst, const at::Tensor& src) {
TORCH_CHECK(src.dtype() == dst.dtype(),
"Expected out tensor to have dtype ", src.dtype(), ", but got ", dst.dtype(), " instead");
TORCH_CHECK(src.device() == dst.device(),
"Expected out tensor to have device ", src.device(), ", but got ", dst.device(), " instead");
dst.copy_(src);
}
void copy_arg(const at::TensorList& dst, const at::TensorList& src) {
TORCH_INTERNAL_ASSERT(dst.size() == src.size());
for (const auto& i : c10::irange(dst.size())) {
copy_arg(dst[i], src[i]);
}
}
// TODO: this doesn't handle restriding empty tensors correctly; see
// gen_resize_out_helper for the correct algorithm
void resize_out_helper(const at::Tensor& dst, const at::Tensor& src) {
at::native::resize_output(dst, src.sizes());
}
void resize_out_helper(const at::TensorList& dst, const at::TensorList& src) {
TORCH_INTERNAL_ASSERT(dst.size() == src.size());
for (const auto& i : c10::irange(dst.size())) {
at::native::resize_output(dst[i], src[i].sizes());
}
}
}
${CompositeViewCopyKernel_Definitions}
${GeneratedCompositeFunctional_Definitions}
${GeneratedCompositeOut_Definitions}
} // namespace native
} // namespace at

View File

@ -0,0 +1,23 @@
#pragma once
// ${generated_comment}
// NB: The implementing C++ file is RegisterDispatchKey.cpp
// The only #includes we need are for custom classes that have defaults in the C++ API
#include <c10/core/MemoryFormat.h>
#include <c10/core/Scalar.h>
#include <ATen/core/Reduction.h>
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
namespace at {
namespace ${dispatch_namespace} {
${dispatch_namespaced_declarations}
} // namespace ${dispatch_namespace}
} // namespace at

View File

@ -0,0 +1,29 @@
#include <ATen/core/TensorBody.h>
// TODO Undo all logic introduced for Note [Avoiding Include Cycles In Static Dispatch]
// Code introduced to avoid cyclic dependency in static dispatch is no longer
// needed as static dispatch logic is moved from TensorBody.h, which caused cycles in the first place,
// to Operators.cpp for supporting multiple backends with multiple kernels.
//
// Note [Avoiding Include Cycles In Static Dispatch]
// In order to avoid #include cycles in the static dispatch build, we've carefully split out
// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h.
//
// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h.
// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods
// all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all
// directly inlined into TensorBody.h.
// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API,
// which include functions that have defaultable std::optional<Tensor> arguments.
// That requires knowing the full Tensor class definition.
//
// We break the cycle by doing the following:
// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h
// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl.,
// - CPUFunctions_inl.h includes everything else
// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class,
// and then it includes CPUFunctions_inl.h.
// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly.
// - This also means that static dispatch build, CPUFunctions.h only needs to
// #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h.
${inline_headers}

View File

@ -0,0 +1,22 @@
#pragma once
// ${generated_comment}
// NB: The implementing C++ file is RegisterDispatchKey.cpp
// The only #includes we need are for custom classes that have defaults in the C++ API
#include <c10/core/MemoryFormat.h>
#include <c10/core/Scalar.h>
#include <ATen/core/Reduction.h>
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from \
<ATen/ops/{my_operator}_${dispatch_namespace}_dispatch.h>. \
See NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
${DispatchKeyFunctions_inl_includes}
${dispatch_namespaced_declarations}

View File

@ -0,0 +1,13 @@
// ${generated_comment}
${includes}
${native_functions_include}
namespace {
${helper_fns}
} // namespace
${namespace_prologue}
${native_function_definitions}
${namespace_epilogue}

View File

@ -0,0 +1,19 @@
#pragma once
// an external backend might generate file within its code tree
// and check all the source files within the tree with clang-format.
// so, disable it since the backend might have a different config.
// clang-format off
// ${generated_comment}
#include <ATen/Tensor.h>
${namespace_prologue}
struct ${class_name} {
${dispatch_declarations}
};
${namespace_epilogue}

View File

@ -0,0 +1,26 @@
#pragma once
// ${generated_comment}
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
${static_dispatch_ops_headers}
${operator_includes}
namespace at {
${function_definitions}
}

View File

@ -0,0 +1,33 @@
#pragma once
// ${generated_comment}
#include <ATen/Tensor.h>
namespace at {
namespace functionalization {
enum class InverseReturnMode {
/// Specifies that functional inverses should always return a view.
AlwaysView,
/// Specifies that functional inverses should always return a non-view / copy.
NeverView,
/// Specifies that functional inverses should return a view unless a (copying) scatter
/// inverse exists, in which case that will be used instead.
/// This avoids as_strided() calls that can be difficult for subclasses to handle.
ViewOrScatterInverse,
};
struct FunctionalInverses {
${view_inverse_declarations}
// NB: These are not generated! They're manually implemented in the template.
// TODO: Change codegen to generate these. See the following link:
// https://github.com/pytorch/pytorch/blob/main/torchgen/model.py#L2583-L2585
static at::Tensor chunk_inverse(const at::Tensor & base, const at::Tensor & mutated_view, InverseReturnMode inverse_return_mode, int64_t mutated_view_idx, int chunks, int dim);
static at::Tensor narrow_inverse(const at::Tensor & base, const at::Tensor & mutated_view, InverseReturnMode inverse_return_mode, int dim, c10::SymInt start, c10::SymInt length);
};
}
}

View File

@ -0,0 +1,103 @@
#include <array>
#include <ATen/Functions.h>
#include <ATen/Utils.h>
#include <c10/core/Allocator.h>
namespace at {
Tensor TensorMaker::make_tensor() {
AutoDispatchBelowADInplaceOrView guard{}; // TODO: Remove.
tracer::impl::NoTracerDispatchMode tracer_guard{};
check_size_nonnegative(sizes_);
TORCH_CHECK_VALUE(
!deleter_ || !ctx_,
"The deleter and context arguments are mutually exclusive.");
if (device_ == std::nullopt) {
device_ = globalContext().getDeviceFromPtr(data_, opts_.device().type());
}
if (opts_.device().has_index()) {
// clang-format off
TORCH_CHECK_VALUE(
opts_.device() == *device_,
"Specified device ", opts_.device(), " does not match device of data ", *device_);
// clang-format on
}
std::size_t size_bytes = computeStorageSize();
DataPtr data_ptr{};
if (deleter_) {
data_ptr = makeDataPtrFromDeleter();
} else {
data_ptr = makeDataPtrFromContext();
}
TORCH_CHECK(!resizeable_ || allocator_ != nullptr, "Must specify an allocator with allocator() if you want to use resizeable_storage()");
Storage storage{Storage::use_byte_size_t{}, size_bytes, std::move(data_ptr), /*allocator=*/allocator_, /*resizable=*/resizeable_};
Tensor tensor = detail::make_tensor<TensorImpl>(
std::move(storage), opts_.computeDispatchKey(), opts_.dtype());
TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
if (strides_) {
tensor_impl->set_sizes_and_strides(sizes_, *strides_);
} else {
tensor_impl->set_sizes_contiguous(sizes_);
}
if (storage_offset_) {
tensor_impl->set_storage_offset(*storage_offset_);
}
return tensor;
}
std::size_t TensorMaker::computeStorageSize() const noexcept {
std::size_t itemsize = opts_.dtype().itemsize();
if (strides_) {
auto storage_size = detail::computeStorageNbytes(sizes_, *strides_, itemsize);
if (storage_offset_) {
storage_size += storage_offset_.value();
}
return storage_size;
}
std::size_t size = 1;
for (std::int64_t s : sizes_) {
size *= static_cast<std::size_t>(s);
}
auto storage_size = size * itemsize;
if (storage_offset_) {
storage_size += storage_offset_.value();
}
return storage_size;
}
inline DataPtr TensorMaker::makeDataPtrFromDeleter() noexcept {
return InefficientStdFunctionContext::makeDataPtr(data_, std::move(deleter_), *device_);
}
inline DataPtr TensorMaker::makeDataPtrFromContext() noexcept {
return DataPtr{data_, ctx_.release(), ctx_.get_deleter(), *device_};
}
IntArrayRef TensorMaker::makeTempSizes() const noexcept {
static std::int64_t zeros[5] = {0, 0, 0, 0, 0};
if (opts_.has_memory_format()) {
MemoryFormat format = *opts_.memory_format_opt();
if (format == MemoryFormat::ChannelsLast) {
return IntArrayRef(zeros, 4);
}
if (format == MemoryFormat::ChannelsLast3d) {
return IntArrayRef(zeros, 5);
}
}
return IntArrayRef(zeros, 1);
}
} // namespace at

View File

@ -0,0 +1,143 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}.h> and \
see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
// NOTE: [TORCH_ASSERT_ONLY_METHOD_OPERATORS]
//
// In ATen, certain generated headers files include the definitions of
// every single operator in PyTorch. Unfortunately this means every
// time an operator signature is updated or changed in
// native_functions.yaml, you (and every other PyTorch developer) need
// to recompile every source file that includes any of these headers.
//
// To break up these header dependencies, and improve incremental
// build times for all PyTorch developers. These headers are split
// into per-operator headers in the `ATen/ops` folder. This limits
// incremental builds to only changes to methods of `Tensor`, or files
// that use the specific operator being changed. With `at::sum` as an
// example, you should include
//
// <ATen/ops/sum.h> // instead of ATen/Functions.h
// <ATen/ops/sum_native.h> // instead of ATen/NativeFunctions.h
// <ATen/ops/sum_ops.h> // instead of ATen/Operators.h
// <ATen/ops/sum_cpu_dispatch.h> // instead of ATen/CPUFunctions.h
//
// However, even if you're careful to use this in your own code.
// `Functions.h` might be included indirectly through another header
// without you realising. To avoid this, you can add
//
// #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
//
// to the top of your source file. This way any time the non-specific
// headers are included, the compiler will error out.
//
// Also, be aware that `ops` are not available in all build
// configurations (namely fb-internal) so you must guard these
// includes with `#ifdef AT_PER_OPERATOR_HEADERS`. e.g.
//
// #ifndef AT_PER_OPERATOR_HEADERS
// #include <ATen/Functions.h>
// #else
// #include <ATen/ops/sum.h>
// #endif
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <c10/core/SymInt.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/util/OptionalArrayRef.h>
#include <ATen/ops/from_blob.h>
#include <ATen/ops/tensor.h>
${Functions_includes}
namespace at {
${Functions_declarations}
// Special C++ only overloads for std()-like functions (See gh-40287)
// These are needed because int -> bool conversion takes precedence over int -> IntArrayRef
// So, for example std(0) would select the std(unbiased=False) overload
TORCH_API inline Tensor var(const Tensor& self, int dim) {
return at::var(self, IntArrayRef{dim});
}
TORCH_API inline std::tuple<Tensor, Tensor> var_mean(const Tensor& self, int dim) {
return at::var_mean(self, IntArrayRef{dim});
}
TORCH_API inline Tensor std(const Tensor& self, int dim) {
return at::std(self, IntArrayRef{dim});
}
TORCH_API inline std::tuple<Tensor, Tensor> std_mean(const Tensor& self, int dim) {
return at::std_mean(self, IntArrayRef{dim});
}
inline int64_t numel(const Tensor& tensor) {
return tensor.numel();
}
inline int64_t size(const Tensor& tensor, int64_t dim) {
return tensor.size(dim);
}
inline int64_t stride(const Tensor& tensor, int64_t dim) {
return tensor.stride(dim);
}
inline bool is_complex(const Tensor& tensor) {
return tensor.is_complex();
}
inline bool is_floating_point(const Tensor& tensor) {
return tensor.is_floating_point();
}
inline bool is_signed(const Tensor& tensor) {
return tensor.is_signed();
}
inline bool is_inference(const Tensor& tensor) {
return tensor.is_inference();
}
inline bool _is_zerotensor(const Tensor& tensor) {
return tensor._is_zerotensor();
}
inline bool is_conj(const Tensor& tensor) {
return tensor.is_conj();
}
inline Tensor conj(const Tensor& tensor) {
return tensor.conj();
}
inline bool is_neg(const Tensor& tensor) {
return tensor.is_neg();
}
}

View File

@ -0,0 +1,19 @@
#pragma once
// This file contains autogenerated LazyTensor IR nodes
${lazy_ir_sysinc}
${lazy_ir_inc}
${namespace_prologue}
using at::operator<<;
// kNullValue is used to contribute a static hash value any time
// a node has an Optional<Value> input that is nullopt. It is important
// to differentiate between HASH(std::nullopt, something) and HASH(something, std::nullopt),
// and using kNullValue in the hash function in the order of arguments
// serves this purpose.
static const torch::lazy::Value kNullValue = torch::lazy::Value();
${ir_declarations}
${namespace_epilogue}

View File

@ -0,0 +1,11 @@
#pragma once
${lazy_non_native_ir_inc}
// This file contains autogenerated LazyTensor Non Native IR nodes
${namespace_prologue}
${non_native_ir_nodes}
${namespace_epilogue}

View File

@ -0,0 +1,24 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
${MethodOperators_includes}
namespace at {
namespace _ops {
${MethodOperators_declarations}
} // namespace _ops
} // namespace at

View File

@ -0,0 +1,17 @@
#pragma once
// ${generated_comment}
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <tuple>
#include <vector>
${extra_includes}
${native_function_declarations}

View File

@ -0,0 +1,33 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}_native.h> \
and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <tuple>
#include <vector>
${NativeFunctions_includes}
${NativeFunctions_declarations}

View File

@ -0,0 +1,23 @@
#pragma once
// ${generated_comment}
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/TensorIterator.h>
#include <ATen/TensorMeta.h>
#include <tuple>
#include <vector>
namespace at {
namespace meta {
${meta_function_declarations}
} // namespace native
} // namespace at

View File

@ -0,0 +1,19 @@
#pragma once
// ${generated_comment}
#include <ATen/core/Tensor.h>
#include <ATen/core/IListRef.h>
#include <ATen/TensorMeta.h>
#include <ATen/TensorIterator.h>
${NativeMetaFunctions_includes}
namespace at {
namespace meta {
${NativeMetaFunctions_declarations}
} // namespace meta
} // namespace at

View File

@ -0,0 +1,18 @@
#pragma once
// ${generated_comment}
#include <tuple>
#include <vector>
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
namespace at {
namespace _ops {
${declarations}
}} // namespace at::_ops

View File

@ -0,0 +1,19 @@
#include <ATen/Tensor.h>
#include <ATen/core/dispatch/Dispatcher.h>
// ${generated_comment}
// NOTE See [Sharded File] comment in VariableType
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
${operator_headers}
#endif
${static_dispatch_extra_headers}
namespace at { namespace _ops {
${definitions}
}} // namespace at::_ops

View File

@ -0,0 +1,74 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}_ops.h> \
and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
#include <c10/core/SymInt.h>
#include <c10/core/SymIntArrayRef.h>
#include <c10/core/Scalar.h>
#include <c10/core/TensorOptions.h>
#include <c10/core/QScheme.h>
#include <c10/util/OptionalArrayRef.h>
#include <tuple>
#include <vector>
${Operators_includes}
// Extension writers: do you write wrapper functions? Are you frustrated with
// resolving overloads of operators? Are you frustrated with dealing with
// pointer-to-methods and resolving overloads of pointer-to-methods?? Look no
// further, this is the utility for you.
//
// Given an operator schema: aten::op.overload(...
//
// Use ATEN_FN2(op, overload) to get a *function* version of the operator
// that is guaranteed to not be overloaded. This means that you can safely
// decltype(&ATEN_FN2(op, overload)) it. NB: the 2 means this macro takes 2 args.
//
// Given an operator schema without an overload name: aten::op(...
//
// Use ATEN_FN(op) to get an unambiguous *function* version of the operator.
//
// There is some interesting behavior for out= operations.
// ATEN_FN2(sin, out) gives a function that is *faithful* to the schema;
// that is, the order of arguments is exactly what it looks like in the schema.
#define ATEN_FN2(op_name, overload) at::_ops::op_name##_##overload::call
#define ATEN_FN(op_name) at::_ops::op_name::call
// Separately, ATEN_OP(op) and ATEN_OP2(op, overload) define a class containing compile-time
// metadata about a given aten operator.
// Notable data on the class includes:
// - ATEN_OP2(add, Tensor)::name // returns the string name: "add"
// - ATEN_OP2(add, Tensor)::overload_name // returns the string overload name: "Tensor"
// - ATEN_OP2(add, Tensor)::schema // returns the C++ schema type: at::Tensor (const at::Tensor &, const at::Tensor &, const at::Scalar &)
// - ATEN_OP2(add, Tensor)::schema_str // returns the string jit type: "add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor"
#define ATEN_OP2(op_name, overload) at::_ops::op_name##_##overload
#define ATEN_OP(op_name) at::_ops::op_name
// WARNING: Please do not call any of the ops in the _ops namespace directly.
// Use the ATEN_FN macros. We do not guarantee stability of the naming
// scheme for the functions in at::_ops
// See Note [The ATen Operators API] for details of the at::_ops namespace
namespace at {
namespace _ops {
${Operators_declarations}
} // namespace _ops
} // namespace at

View File

@ -0,0 +1,15 @@
// ${generated_comment}
#include <ATen/RedispatchFunctions.h>
#include <ATen/Functions.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/core/op_registration/adaption.h>
namespace at {
namespace redispatch {
${function_redispatch_definitions}
} // namespace redispatch
} // namespace at

View File

@ -0,0 +1,32 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_ONLY_METHOD_OPERATORS
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider using the at::_ops::{name}::redispatch() interface by including \
the specific operator from <ATen/ops/{my_operator}_ops.h>
#endif
#include <c10/core/Scalar.h>
#include <ATen/Tensor.h>
#include <c10/core/Storage.h>
#include <ATen/core/Generator.h>
#include <c10/util/Deprecated.h>
#include <ATen/DeviceGuard.h>
#include <c10/core/TensorOptions.h>
#include <ATen/core/Reduction.h>
#include <optional>
#include <ATen/TensorUtils.h>
#include <ATen/Context.h>
#include <ATen/TracerMode.h>
#include <ATen/Operators.h>
namespace at {
namespace redispatch {
${function_redispatch_definitions}
} // namespace redispatch
}

View File

@ -0,0 +1,29 @@
// We register ops with a higher priority dispatch key (BackendSelect) than the usual backend-specific keys (e.g. CPU)
// which makes calls to the factory functions dispatch to here.
// We then 'manually' compute a lower-priority to re-dispatch to (e.g. CPU) to get to the eventually correct backend.
// ${generated_comment}
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <ATen/core/dispatch/DispatchKeyExtractor.h>
#include <torch/library.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
${ops_headers}
#endif
namespace at {
namespace {
${backend_select_method_definitions}
TORCH_LIBRARY_IMPL(aten, BackendSelect, m) {
${backend_select_function_registrations};
}
} // namespace
} // at

View File

@ -0,0 +1,41 @@
#include <torch/csrc/jit/runtime/operator.h>
#include <torch/csrc/jit/runtime/custom_operator.h>
#include <torch/csrc/jit/runtime/register_ops_utils.h>
#include <ATen/UnboxingFunctions.h>
// ${generated_comment}
// NOTE [Sharded File]: This file is generated in a sharded fashion to speed up
// incremental rebuilds. See the comment at the top of
// templates/VariableType.cpp for an analogous, in-depth discussion.
//
// Generated by tools/jit/gen_unboxing.py. This file registers all ATen ops into JIT op registry instead of c10
// dispatcher. JIT op registry only takes boxed kernels, so we are calling unboxing functions in UnboxingFunctions.h
// to cast arguments into C++ types (instead of IValue) and delegate to unboxed kernels.
namespace torch { namespace jit {
using autograd::Variable;
using autograd::variable_list;
using at::Scalar;
using at::ScalarType;
using at::Tensor;
using at::TensorOptions;
using at::DeviceGuard;
using ::c10::fmap;
using ::c10::filter;
namespace {
RegisterOperators reg({
// Generated operators
${unboxed_ops}
});
} // anon namespace
}} // namespace torch::jit

View File

@ -0,0 +1,24 @@
${ns_prologue}
// NB: TORCH_LIBRARY_IMPL must be in an anonymous namespace to avoid
// ambiguity with conflicting identifiers that may have been defined in
// at namespace already.
namespace {
${dispatch_helpers}
${dispatch_anonymous_definitions}
${static_init_dispatch_registrations}
} // anonymous namespace
${deferred_dispatch_registrations}
namespace ${dispatch_namespace} {
${dispatch_namespaced_definitions}
} // namespace ${dispatch_namespace}
${ns_epilogue}

View File

@ -0,0 +1,54 @@
// required for old g++ to compile PRId64 macros, see
// https://github.com/pytorch/pytorch/issues/3571
// for context
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
// an external backend might generate file within its code tree
// and check all the source files within the tree with clang-format.
// so, disable it since the backend might have a different config.
// clang-format off
// NOTE: This condition is true for all PyTorch internal libraries, it
// just excludes external projects such as torch_xla which
// re-use some of the PyTorch codegen machinery.
#if defined(CAFFE2_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_BUILD_MAIN_LIB) || \
defined(TORCH_HIP_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_CU_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_CPP_BUILD_MAIN_LIB)
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#endif
// ${generated_comment}
#include <c10/core/TensorImpl.h>
#include <c10/core/Allocator.h>
#include <ATen/DeviceGuard.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/Utils.h>
#include <ATen/WrapDimUtils.h>
#include <ATen/Dispatch.h>
#include <c10/util/ExclusivelyOwned.h>
#include <c10/util/Half.h>
#include <c10/core/UndefinedTensorImpl.h>
#include <optional>
#include <ATen/Tensor.h>
#include <ATen/native/Resize.h>
#include <cstddef>
#include <functional>
#include <memory>
#include <utility>
#include <ATen/Config.h>
#include <ATen/core/op_registration/adaption.h>
#include <torch/library.h>
$extra_cuda_headers
$external_backend_headers
$dispatch_headers
$ops_headers
// See template file RegisterDispatchDefinitions.ini
$dispatch_definitions

View File

@ -0,0 +1,110 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include <ATen/core/LegacyTypeDispatch.h>
#include <ATen/EmptyTensor.h>
#include <ATen/FunctionalTensorWrapper.h>
#include <ATen/FunctionalInverses.h>
#include <ATen/MemoryOverlap.h>
#include <torch/library.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#include <ATen/NativeFunctions.h>
#else
// needed for the meta tensor calls to get stride info in functionalization
#include <ATen/ops/empty_strided_native.h>
// needed for special handling of copy_().
// See Note [functionalizating copy_() and not preserving strides]
#include <ATen/ops/to_ops.h>
#include <ATen/ops/expand_copy_ops.h>
$ops_headers
#endif
namespace at {
namespace functionalization {
// This keyset is used by functionalization when it calls into meta kernels
// to accurately propagate stride metadata.
// Exclude any modes: the purpose of calling into meta kernels is only as an implementation
// detail to perform shape inference, and we don't want any modal keys to run.
// Specifically, we want to prevent functionalization and Python modes from running.
constexpr auto exclude_keys_for_meta_dispatch =
c10::functorch_transforms_ks |
c10::DispatchKeySet({
c10::DispatchKey::FuncTorchDynamicLayerBackMode,
c10::DispatchKey::FuncTorchDynamicLayerFrontMode,
c10::DispatchKey::Python,
c10::DispatchKey::PreDispatch,
});
// Helper around at::has_internal_overlap.
// The ATen util is used in hot-path eager mode: it's always fast,
// but might return TOO_HARD sometimes.
// During functionalization, we're ok taking a bit longer
// to detect memory overlap.
inline bool has_internal_overlap_helper(const at::Tensor t) {
auto has_overlap = at::has_internal_overlap(t);
if (has_overlap == at::MemOverlap::Yes) return true;
if (has_overlap == at::MemOverlap::No) return false;
return false;
}
inline Tensor to_meta(const Tensor& t) {
if (!t.defined()) return t;
return at::native::empty_strided_meta_symint(t.sym_sizes(), t.sym_strides(),
/*dtype=*/std::make_optional(t.scalar_type()), /*layout=*/std::make_optional(t.layout()),
/*device=*/std::make_optional(c10::Device(kMeta)), /*pin_memory=*/std::nullopt);
}
inline std::optional<Tensor> to_meta(const std::optional<Tensor>& t) {
if (t.has_value()) {
return std::make_optional<Tensor>(to_meta(*t));
}
return std::nullopt;
}
inline std::vector<Tensor> to_meta(at::ITensorListRef t_list) {
std::vector<Tensor> outputs;
outputs.reserve(t_list.size());
for (const auto& tensor : t_list) {
outputs.push_back(to_meta(tensor));
}
return outputs;
}
inline c10::List<Tensor> to_meta(const c10::List<Tensor>& t_list) {
c10::List<Tensor> outputs;
outputs.reserve(t_list.size());
for (const auto i : c10::irange(t_list.size())) {
outputs.push_back(to_meta(t_list[i]));
}
return outputs;
}
inline c10::List<::std::optional<Tensor>> to_meta(const c10::List<::std::optional<Tensor>>& t_list) {
c10::List<::std::optional<Tensor>> outputs;
outputs.reserve(t_list.size());
for (const auto i : c10::irange(t_list.size())) {
outputs.push_back(to_meta(t_list[i]));
}
return outputs;
}
${func_definitions}
} // namespace functionalization
namespace {
TORCH_LIBRARY_IMPL(aten, Functionalize, m) {
${func_registrations};
}
} // namespace
} // namespace at

View File

@ -0,0 +1,13 @@
// ${generated_comment}
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <torch/library.h>
namespace at {
TORCH_LIBRARY(aten, m) {
${aten_schema_registrations};
// Distributed Ops
// Implementations located in torch/csrc/jit/runtime/register_distributed_ops.cpp
m.def("get_gradients(int context_id) -> Dict(Tensor, Tensor)");
}
${schema_registrations}
} // namespace at

View File

@ -0,0 +1,4 @@
// This file contains all native_functions that can be registered to
// and the schema string that they should be registered with
${registration_declarations}

View File

@ -0,0 +1,753 @@
#pragma once
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#include <c10/core/Device.h>
#include <c10/core/Layout.h>
#include <c10/core/MemoryFormat.h>
#include <c10/core/QScheme.h>
#include <c10/core/Stream.h>
#include <c10/core/Scalar.h>
#include <c10/core/ScalarType.h>
#include <c10/core/ScalarTypeToTypeMeta.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorImpl.h>
#include <c10/core/UndefinedTensorImpl.h>
#include <c10/core/WrapDimMinimal.h>
#include <c10/util/Exception.h>
#include <c10/util/ExclusivelyOwned.h>
#include <c10/util/Deprecated.h>
#include <c10/util/MaybeOwned.h>
#include <optional>
#include <c10/util/OptionalArrayRef.h>
#include <c10/util/intrusive_ptr.h>
#include <c10/macros/Export.h>
#include <ATen/core/CheckMemoryFormat.h>
#include <ATen/core/DeprecatedTypePropertiesRegistry.h>
#include <ATen/core/DeprecatedTypeProperties.h>
#include <ATen/core/NamedTensor.h>
#include <ATen/core/QuantizerBase.h>
#include <c10/core/SymInt.h>
#include <ATen/core/TensorAccessor.h>
#include <ATen/core/TensorBase.h>
#include <ATen/MethodOperators.h>
namespace c10{
template<class T> class List;
template<class T> class IListRef;
}
namespace at {
struct Generator;
struct Type;
class DeprecatedTypeProperties;
class Tensor;
} // namespace at
namespace at {
namespace indexing {
struct TensorIndex;
} // namespace indexing
} // namespace at
namespace torch { namespace autograd {
struct Node;
}} // namespace torch::autograd
namespace at {
class OptionalTensorRef;
class TensorRef;
class Tensor;
using TensorList = ArrayRef<Tensor>;
using ITensorList = c10::IListRef<Tensor>;
using Stream = c10::Stream;
// Tensor is a "generic" object holding a pointer to the underlying TensorImpl object, which
// has an embedded reference count. In this way, Tensor is similar to boost::intrusive_ptr.
//
// For example:
//
// void func(Tensor a) {
// Tensor b = a;
// ...
// }
//
// In this example, when we say Tensor b = a, we are creating a new object that points to the
// same underlying TensorImpl, and bumps its reference count. When b goes out of scope, the
// destructor decrements the reference count by calling release() on the TensorImpl it points to.
// The existing constructors, operator overloads, etc. take care to implement the correct semantics.
//
// Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and
// special care must be taken to handle this.
class TORCH_API Tensor: public TensorBase {
protected:
// Create a Tensor with a +0 reference count. Special care must be
// taken to avoid decrementing this reference count at destruction
// time. Intended to support MaybeOwnedTraits<Tensor>.
explicit Tensor(unsafe_borrow_t, const TensorBase& rhs): TensorBase(unsafe_borrow_t{}, rhs) {}
friend MaybeOwnedTraits<Tensor>;
friend OptionalTensorRef;
friend TensorRef;
public:
Tensor() = default;
// This constructor should not be used by end users and is an implementation
// detail invoked by autogenerated code.
explicit Tensor(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
: TensorBase(std::move(tensor_impl)) {}
Tensor(const Tensor &tensor) = default;
Tensor(Tensor &&tensor) = default;
// Implicitly move-constructible from TensorBase, but must be explicit to increase refcount
explicit Tensor(const TensorBase &base): TensorBase(base) {}
/*implicit*/ Tensor(TensorBase &&base): TensorBase(std::move(base)) {}
// Creates a new wrapper from TensorImpl. Intentionally a free method because
// it should be used with care. Checks necessary invariants
static Tensor wrap_tensor_impl(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl) {
return TensorBase::wrap_tensor_impl(std::move(tensor_impl));
}
Tensor contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const {
return TensorBase::contiguous(memory_format);
}
Tensor conj() const {
if (!this->is_complex()) {
return *this;
}
switch (this->layout()) {
case at::kSparse:
case at::kSparseCsr:
case at::kSparseCsc:
case at::kSparseBsr:
case at::kSparseBsc:
return this->conj_physical();
default:
return this->_conj();
}
}
// Aliased by Dimname overloads, so need explicit using
using TensorBase::size;
using TensorBase::sym_size;
using TensorBase::stride;
/// Should be used if *this can reasonably be expected to be contiguous and
/// performance is important.
/// Compared to contiguous, it saves a reference count
/// increment/decrement if *this is already contiguous, at the cost
/// in all cases of an extra pointer of stack usage, an extra branch
/// to access, and an extra branch at destruction time.
c10::MaybeOwned<Tensor> expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const &;
// Use .contiguous() instead. Trying to borrow from a prvalue Tensor
// will only lead to trouble and dangling references.
c10::MaybeOwned<Tensor> expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete;
// The following overloads are very intruiging. Consider the following
// program:
//
// x[1] = 3;
//
// We would expect that the first entry of x is written to 3. But how can we
// actually achieve this? x[1] evaluates to a tensor...
//
// The answer is, using a ref-qualifier. x[1] is an rvalue, which cannot be
// (profitably) assigned to in the traditional sense, so we overload
// assignment to mean, "Actually, copy 3 into the tensor data." This is done
// with an rvalue-reference ref-qualified overload (the methods with && at the
// end of their type.)
//
// There's one more fly in the ointment: We also want
//
// Tensor x = y;
//
// to work, and we want it NOT to copy. So we need a traditional operator=
// overload. But we MUST specify a mutable lvalue ref-qualifier, to
// disambiguate the traditional overload from the rvalue-reference
// ref-qualified overload. Otherwise, it will be ambiguous, because
// a non ref-qualified method is eligible for all situations.
// Unfortunately, we have to write these constructors out manually
// to work around an MSVC bug:
// error C2580: 'at::Tensor &at::Tensor::operator =(const at::Tensor &) &':
// multiple versions of a defaulted special member functions are not allowed
// Tensor& operator=(const Tensor&) & = default;
// Tensor& operator=(Tensor&&) & = default;
// Also MSVC will wrongly issue the following warning with the aforementioned fix
// warning C4522: 'at::Tensor': multiple assignment operators specified
// Let's just skip the warning.
//
// TODO: temporarily disabled
Tensor& operator=(const TensorBase& x) & {
impl_ = x.getIntrusivePtr();
return *this;
}
Tensor& operator=(TensorBase&& x) & noexcept {
impl_ = x.unsafeReleaseIntrusivePtr();
return *this;
}
Tensor& operator=(const Tensor &x) & {
return operator=(static_cast<const TensorBase&>(x));
}
Tensor& operator=(Tensor &&x) & noexcept {
return operator=(static_cast<TensorBase&&>(x));
}
Tensor& operator=(const Scalar &v) && {
return fill_(v);
}
Tensor& operator=(const Tensor &rhs) && {
return copy_(rhs);
}
Tensor& operator=(Tensor&& rhs) && {
return copy_(rhs);
}
C10_DEPRECATED_MESSAGE("Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device().")
DeprecatedTypeProperties & type() const {
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
dispatchKeyToBackend(legacyExtractDispatchKey(key_set())),
scalar_type());
}
Tensor toType(ScalarType t) const {
return to(options().dtype(t), /*non_blocking*/ false, /*copy*/ false);
}
// TODO: Deprecate me
Tensor toBackend(Backend b) const {
return to(options().device(backendToDeviceType(b)).layout(layout_from_backend(b)), /*non_blocking*/ false, /*copy*/ false);
}
C10_DEPRECATED_MESSAGE("Tensor.is_variable() is deprecated; everything is a variable now. (If you want to assert that variable has been appropriately handled already, use at::impl::variable_excluded_from_dispatch())")
bool is_variable() const noexcept {
return !at::impl::variable_excluded_from_dispatch();
}
template<typename T>
C10_DEPRECATED_MESSAGE("Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead.")
T * data() const {
return data_ptr<T>();
}
template <typename T>
T item() const;
template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead")
GenericPackedTensorAccessor<T,N,PtrTraits,index_t> packed_accessor() const & {
return generic_packed_accessor<T,N,PtrTraits,index_t>();
}
template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead")
GenericPackedTensorAccessor<T,N,PtrTraits,index_t> packed_accessor() && = delete;
Tensor operator~() const {
return bitwise_not();
}
Tensor operator-() const {
return neg();
}
Tensor& operator+=(const Tensor & other) {
return add_(other);
}
Tensor& operator+=(const Scalar & other) {
return add_(other);
}
Tensor& operator-=(const Tensor & other) {
return sub_(other);
}
Tensor& operator-=(const Scalar & other) {
return sub_(other);
}
Tensor& operator*=(const Tensor & other) {
return mul_(other);
}
Tensor& operator*=(const Scalar & other) {
return mul_(other);
}
Tensor& operator/=(const Tensor & other) {
return div_(other);
}
Tensor& operator/=(const Scalar & other) {
return div_(other);
}
Tensor& operator&=(const Tensor & other) {
return bitwise_and_(other);
}
Tensor& operator|=(const Tensor & other) {
return bitwise_or_(other);
}
Tensor& operator^=(const Tensor & other) {
return bitwise_xor_(other);
}
Tensor operator[](const Scalar & index) const {
if (!index.isIntegral(false)) {
TORCH_CHECK_INDEX(false, "Can only index tensors with integral scalars");
}
return this->operator[](index.toLong());
}
Tensor operator[](const Tensor & index) const {
// These properties are checked in the Scalar constructor, but we already
// check them here to provide more useful diagnostics for the user.
if (!index.defined()) {
TORCH_CHECK_INDEX(false, "Can only index with tensors that are defined");
}
if (index.dim() != 0) {
TORCH_CHECK_INDEX(false,
"Can only index with tensors that are scalars (zero-dim)");
}
// The Scalar(Tensor) constructor is explicit, so we need to call it.
return this->operator[](index.item());
}
Tensor operator[](int64_t index) const {
return select(0, index);
}
Tensor index(ArrayRef<at::indexing::TensorIndex> indices) const;
Tensor index(std::initializer_list<at::indexing::TensorIndex> indices) const;
Tensor & index_put_(ArrayRef<at::indexing::TensorIndex> indices, Tensor const & rhs);
Tensor & index_put_(ArrayRef<at::indexing::TensorIndex> indices, const Scalar& v);
Tensor & index_put_(std::initializer_list<at::indexing::TensorIndex> indices, Tensor const & rhs);
Tensor & index_put_(std::initializer_list<at::indexing::TensorIndex> indices, const Scalar& v);
Tensor cpu() const {
return to(options().device(c10::DeviceType::CPU), /*non_blocking*/ false, /*copy*/ false);
}
// TODO: The Python version also accepts arguments
Tensor cuda() const {
return to(options().device(c10::DeviceType::CUDA), /*non_blocking*/ false, /*copy*/ false);
}
Tensor hip() const {
return to(options().device(c10::DeviceType::HIP), /*non_blocking*/ false, /*copy*/ false);
}
Tensor ve() const {
return to(options().device(c10::DeviceType::VE), /*non_blocking*/ false, /*copy*/ false);
}
Tensor vulkan() const {
return to(options().device(c10::DeviceType::Vulkan), /*non_blocking*/ false, /*copy*/ false);
}
Tensor metal() const {
return to(options().device(c10::DeviceType::Metal), /*non_blocking*/ false, /*copy*/ false);
}
Tensor meta() const {
return to(options().device(c10::DeviceType::Meta), /*non_blocking*/ false, /*copy*/ false);
}
// ~~~~~ Autograd API ~~~~~
/// \fn bool is_leaf() const;
///
/// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention.
///
/// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were
/// created by the user. This means that they are not the result of an operation and so
/// `grad_fn()` is `nullptr`.
///
/// Only leaf Tensors will have their `grad()` populated during a call to `backward()`.
/// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`.
///
/// Example:
/// @code
/// auto a = torch::rand(10, torch::requires_grad());
/// std::cout << a.is_leaf() << std::endl; // prints `true`
///
/// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA);
/// std::cout << b.is_leaf() << std::endl; // prints `false`
/// // b was created by the operation that cast a cpu Tensor into a cuda Tensor
///
/// auto c = torch::rand(10, torch::requires_grad()) + 2;
/// std::cout << c.is_leaf() << std::endl; // prints `false`
/// // c was created by the addition operation
///
/// auto d = torch::rand(10).cuda();
/// std::cout << d.is_leaf() << std::endl; // prints `true`
/// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine)
///
/// auto e = torch::rand(10).cuda().requires_grad_();
/// std::cout << e.is_leaf() << std::endl; // prints `true`
/// // e requires gradients and has no operations creating it
///
/// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true));
/// std::cout << f.is_leaf() << std::endl; // prints `true`
/// // f requires grad, has no operation creating it
/// @endcode
/// \fn void backward(const Tensor & gradient={}, std::optional<bool> retain_graph=std::nullopt, bool create_graph=false, std::optional<TensorList> inputs=std::nullopt) const;
///
/// Computes the gradient of current tensor with respect to graph leaves.
///
/// The graph is differentiated using the chain rule. If the tensor is
/// non-scalar (i.e. its data has more than one element) and requires
/// gradient, the function additionally requires specifying ``gradient``.
/// It should be a tensor of matching type and location, that contains
/// the gradient of the differentiated function w.r.t. this Tensor.
///
/// This function accumulates gradients in the leaves - you might need to
/// zero them before calling it.
///
/// \param gradient Gradient w.r.t. the
/// tensor. If it is a tensor, it will be automatically converted
/// to a Tensor that does not require grad unless ``create_graph`` is True.
/// None values can be specified for scalar Tensors or ones that
/// don't require grad. If a None value would be acceptable then
/// this argument is optional.
/// \param retain_graph If ``false``, the graph used to compute
/// the grads will be freed. Note that in nearly all cases setting
/// this option to True is not needed and often can be worked around
/// in a much more efficient way. Defaults to the value of
/// ``create_graph``.
/// \param create_graph If ``true``, graph of the derivative will
/// be constructed, allowing to compute higher order derivative
/// products. Defaults to ``false``.
/// \param inputs Inputs w.r.t. which the gradient will be accumulated into
/// ``at::Tensor::grad``. All other Tensors will be ignored. If not
/// provided, the gradient is accumulated into all the leaf Tensors
/// that were used to compute the current tensor.
/// When inputs are provided and a given input is not a leaf,
/// the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients).
/// It is an implementation detail on which the user should not rely.
/// See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.
void backward(const Tensor & gradient={}, std::optional<bool> retain_graph=std::nullopt, bool create_graph=false, std::optional<TensorList> inputs=std::nullopt) const {
// NB: Adding this wrapper to _backward here because we'd like our
// 'backwards' api to accept the 'inputs' argument optionally. Since code gen
// currently does not support optional of TensorList our approach is to replace
// backward in native_functions.yaml with _backward and call it here instead.
if (inputs.has_value()) {
TORCH_CHECK(inputs.value().size() > 0, "'inputs' argument to backward cannot be empty")
this->_backward(inputs.value(), gradient, retain_graph, create_graph);
} else {
this->_backward({}, gradient, retain_graph, create_graph);
}
}
/// \fn Tensor detach() const;
///
/// Returns a new Tensor, detached from the current graph.
/// The result will never require gradient.
/// \fn Tensor & detach_() const;
///
/// Detaches the Tensor from the graph that created it, making it a leaf.
/// Views cannot be detached in-place.
/// \fn void retain_grad() const;
///
/// Enables this Tensor to have their :attr:`grad` populated during
/// :func:`backward`. This is a no-op for leaf tensors.
/// \fn bool retains_grad() const;
///
/// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be
/// populated during :func:`backward`, ``false`` otherwise.
const Tensor& set_requires_grad(bool requires_grad) const {
TensorBase::set_requires_grad(requires_grad);
return *this;
}
/// Return a mutable reference to the gradient. This is conventionally
/// used as `t.grad() = x` to set a gradient to a completely new tensor.
/// Note that this function work with a non-const Tensor and is not
/// thread safe.
Tensor& mutable_grad() const {
return impl_->mutable_grad();
}
/// This function returns an undefined tensor by default and returns a defined tensor
/// the first time a call to `backward()` computes gradients for this Tensor.
/// The attribute will then contain the gradients computed and future calls
/// to `backward()` will accumulate (add) gradients into it.
const Tensor& grad() const {
const Tensor& maybe_grad = impl_->grad();
if (!is_leaf() && !retains_grad() && !maybe_grad.defined()) {
TORCH_WARN(
"The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad "
"attribute won't be populated during autograd.backward(). If you indeed want the .grad "
"field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. "
"If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor "
"instead. See github.com/pytorch/pytorch/pull/30531 for more informations.");
}
return maybe_grad;
}
// The Forward AD API functions below are low level and are not to be used by end
// users who should use the API provided in torch/csrc/autograd.h
/// This function returns the forward gradient for this Tensor at the given level.
const Tensor& _fw_grad(uint64_t level) const {
return impl_->_fw_grad(level, *this);
}
/// This function can be used to set the value of the forward grad.
/// Note that the given new_grad might not be used directly if it has different
/// metadata (size/stride/storage offset) compared to this Tensor. In that case,
/// new_grad content will be copied into a new Tensor
void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const {
impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op);
}
// STOP. Thinking of adding a method here, which only makes use
// of other ATen methods? Define it in native_functions.yaml.
//example
//Tensor * add(Tensor & b);
${tensor_method_declarations}
// Special C++ only overloads for std()-like functions (See gh-40287)
// These are needed because int -> bool conversion takes precedence over int -> IntArrayRef
// So, for example std(0) would select the std(unbiased=False) overload
Tensor var(int dim) const {
return var(IntArrayRef{dim});
}
Tensor std(int dim) const {
return std(IntArrayRef{dim});
}
// We changed .dtype() to return a TypeMeta in #12766. Ideally, we want the
// at::kDouble and its friends to be TypeMeta's, but that hasn't happened yet.
// Before that change, we make this method to maintain BC for C++ usage like
// `x.to(y.dtype)`.
// TODO: remove following two after at::kDouble and its friends are TypeMeta's.
inline Tensor to(caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
return this->to(/*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
}
inline Tensor to(Device device, caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
return this->to(device, /*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
}
template <typename F, typename... Args>
decltype(auto) m(F func, Args&&... params) const {
return func(*this, std::forward<Args>(params)...);
}
/// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended
/// to be used from functions that need to access the `Variable`'s equivalent `Tensor`
/// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`).
///
/// One notable difference with the legacy `.data()` function is that changes to the
/// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset)
/// will not update the original `Variable`, due to the fact that this function
/// shallow-copies the `Variable`'s underlying TensorImpl.
at::Tensor tensor_data() const {
return TensorBase::tensor_data();
}
/// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data`
/// in Python, which create a new `Variable` that shares the same storage and
/// tensor metadata with the original `Variable`, but with a completely new
/// autograd history.
///
/// NOTE: If we change the tensor metadata (e.g. sizes / strides /
/// storage / storage_offset) of a variable created from `var.variable_data()`, those
/// changes will not update the original variable `var`. In `.variable_data()`, we set
/// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal,
/// in order to prevent users from changing metadata of `var.variable_data()`
/// and expecting the original variable `var` to also be updated.
at::Tensor variable_data() const {
return TensorBase::variable_data();
}
// Hooks
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
template <typename T>
using hook_return_void_t = std::enable_if_t<std::is_void<typename std::invoke_result_t<T&, Tensor>>::value, unsigned>;
template <typename T>
using hook_return_var_t = std::enable_if_t<std::is_same<typename std::invoke_result_t<T&, Tensor>, Tensor>::value, unsigned>;
/// Registers a backward hook.
///
/// The hook will be called every time a gradient with respect to the Tensor is computed.
/// The hook should have one of the following signature:
/// ```
/// hook(Tensor grad) -> Tensor
/// ```
/// ```
/// hook(Tensor grad) -> void
/// ```
/// The hook should not modify its argument, but it can optionally return a new gradient
/// which will be used in place of `grad`.
///
/// This function returns the index of the hook in the list which can be used to remove hook.
///
/// Example:
/// @code
/// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad());
/// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient
/// v.backward(torch::tensor({1., 2., 3.}));
/// // This prints:
/// // ```
/// // 2
/// // 4
/// // 6
/// // [ CPUFloatType{3} ]
/// // ```
/// std::cout << v.grad() << std::endl;
/// v.remove_hook(h); // removes the hook
/// @endcode
template <typename T>
hook_return_void_t<T> register_hook(T&& hook) const;
template <typename T>
hook_return_var_t<T> register_hook(T&& hook) const;
// Variable methods
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tensor data() const {
return TensorBase::data();
}
void _backward(TensorList inputs, const std::optional<Tensor>& gradient, std::optional<bool> keep_graph, bool create_graph) const;
const Tensor& requires_grad_(bool _requires_grad=true) const {
TensorBase::requires_grad_(_requires_grad);
return *this;
}
};
namespace detail {
// Helper creator for Tensor class which doesn't requires the users to pass
// in an intrusive_ptr instead it just converts the argument passed to
// requested intrusive_ptr type.
template <typename T, typename... Args>
Tensor make_tensor(Args&&... args) {
return Tensor(c10::make_intrusive<T>(std::forward<Args>(args)...));
}
} // namespace detail
} // namespace at
namespace at {
${tensor_method_definitions}
} // namespace at
namespace c10 {
template <>
struct MaybeOwnedTraits<at::Tensor> {
using owned_type = at::Tensor;
using borrow_type = at::Tensor;
static borrow_type createBorrow(const owned_type& from) {
// NOTE: this can be implemented without the special
// unsafe_borrow_t Tensor constructor as
//
// return borrow_type(c10::intrusive_ptr<at::TensorImpl, at::UndefinedTensorImpl>::reclaim(from.unsafeGetTensorImpl()));
//
// but that hurts inlining due to the nullptr check in the
// Tensor(c10::intrusive_ptr<...>) constructor. We already know
// that from.impl_ isn't null because from is a valid Tensor, so
// we needn't do the check again. (using __builtin_assume can
// avoid this, but wouldn't be portable to MSVC.)
return borrow_type(borrow_type::unsafe_borrow_t{}, from);
}
static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) {
lhs.unsafeReleaseTensorImpl();
// See above note: this can be implemented with public API
// similarly to createBorrow(), but that would hurt inlining.
lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs);
}
static void destroyBorrow(borrow_type& toDestroy) {
toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0.
}
static const owned_type& referenceFromBorrow(const borrow_type& borrow) {
return borrow;
}
static const owned_type* pointerFromBorrow(const borrow_type& borrow) {
return &borrow;
}
static bool debugBorrowIsValid(const borrow_type& /*borrow*/) {
return true;
}
};
template <>
struct ExclusivelyOwnedTraits<at::Tensor> {
using repr_type = at::Tensor;
using pointer_type = at::Tensor*;
using const_pointer_type = const at::Tensor*;
static repr_type nullRepr() {
return at::Tensor();
}
template <class... Args>
static repr_type createInPlace(Args&&... args) {
return at::Tensor(std::forward<Args>(args)...);
}
static repr_type moveToRepr(at::Tensor&& x) {
return std::move(x);
}
static void destroyOwned(at::Tensor& x) {
return ExclusivelyOwnedTraits<at::TensorBase>::destroyOwned(x);
}
static at::Tensor take(at::Tensor& x) {
return std::move(x);
}
static pointer_type getImpl(repr_type& x) {
return &x;
}
static const_pointer_type getImpl(const repr_type& x) {
return &x;
}
};
} // namespace c10
namespace at {
inline c10::MaybeOwned<Tensor> borrow_from_optional_tensor(
const std::optional<Tensor>& opt) {
return opt.has_value()
? c10::MaybeOwned<Tensor>::borrowed(*opt)
: c10::MaybeOwned<Tensor>::owned(std::in_place);
}
inline c10::MaybeOwned<Tensor> Tensor::expect_contiguous(MemoryFormat memory_format) const & {
if (is_contiguous(memory_format)) {
return c10::MaybeOwned<Tensor>::borrowed(*this);
} else {
return c10::MaybeOwned<Tensor>::owned(__dispatch_contiguous(memory_format));
}
}
} // namespace at

View File

@ -0,0 +1,61 @@
#include <c10/core/Scalar.h>
#include <ATen/core/TensorBody.h>
#include <c10/util/string_view.h>
namespace at {
namespace {
// Verifies the requested type is the same as the Tensor's type.
void check_type(const TensorBase& tensor, ScalarType type, c10::string_view type_name) {
TORCH_CHECK(
tensor.scalar_type() == type
|| (isQIntType(tensor.scalar_type())
&& toUnderlying(tensor.scalar_type()) == type),
"expected scalar type ", type_name, " but found ", tensor.scalar_type());
}
} // namespace
#define DEFINE_CAST(T, name) \
template <> \
TORCH_API const T* TensorBase::const_data_ptr() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->data_ptr_impl<T>(); \
} \
\
template <> \
TORCH_API const T* TensorBase::const_data_ptr<const T>() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->data_ptr_impl<std::remove_const_t<T>>(); \
} \
\
template <> \
TORCH_API T* TensorBase::mutable_data_ptr() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->mutable_data_ptr_impl<T>(); \
} \
\
template <> \
TORCH_API T* TensorBase::data_ptr() const { \
return mutable_data_ptr<T>(); \
} \
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CAST)
AT_FORALL_QINT_TYPES(DEFINE_CAST)
DEFINE_CAST(uint16_t, UInt16)
DEFINE_CAST(uint32_t, UInt32)
DEFINE_CAST(uint64_t, UInt64)
#undef DEFINE_CAST
#define DEFINE_ITEM(T, name) \
template <> \
TORCH_API T Tensor::item() const { \
return item().to##name(); \
}
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ITEM)
#undef DEFINE_ITEM
} //namespace at

View File

@ -0,0 +1,19 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/DispatchStub.h>
#include <ATen/TensorIterator.h>
#include <ATen/TensorMeta.h>
namespace at {
// NB: this is explicitly copied here (via codegen) rather than
// included via NativeFunctions.h to avoid recompiling this file when
// NativeFunctions.h changes
namespace meta {
${meta_declaration}
}
namespace native {
${native_declaration}
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,14 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/ufunc/${name}.h>
#include <ATen/native/DispatchStub.h>
#include <ATen/TensorIterator.h>
#include <ATen/native/cpu/Loops.h>
#include <ATen/cpu/vec/vec.h>
#include <ATen/Dispatch.h>
#include <c10/core/Scalar.h>
namespace at {
namespace native {
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,21 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/ufunc/${name}.h>
#include <ATen/Dispatch.h>
#include <ATen/native/DispatchStub.h>
#include <c10/core/Scalar.h>
${cuda_headers}
namespace at {
// NB: this is explicitly copied here (via codegen) rather than
// included via NativeFunctions.h to avoid recompiling this file when
// NativeFunctions.h changes
namespace meta {
${meta_declaration}
}
namespace native {
${native_declaration}
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,35 @@
#include <ATen/UnboxingFunctions.h>
#include <ATen/Functions.h>
#include <ATen/Tensor.h>
#include <ATen/core/functional.h>
#include <ATen/core/interned_strings.h>
#include <ATen/core/ivalue.h>
#include <ATen/core/stack.h>
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstring>
#include <sstream>
#include <stdexcept>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
namespace at {
namespace unboxing {
using ::c10::fmap;
using ::c10::filter;
using torch::jit::peek;
using torch::jit::drop;
using torch::jit::pack;
using torch::jit::pop;
// Generated function declaration
${definitions}
} // namespace unboxing
} // namespace at

View File

@ -0,0 +1,32 @@
// ${generated_comment}
// Generated by tools/jit/gen_unboxing.py. This file declares code generated boxed C++ functions for operators,
// base off of native_functions.yaml (or similar yaml file with the same syntax). The definition of such a boxed
// function will pop out IValues from the stack then convert them into the correct C++ types based on given schema. This
// unboxing logic is an alternative to template-based metaprogramming unboxing.
#pragma once
#include <ATen/ATen.h>
namespace at {
namespace unboxing {
namespace {
template<typename T, size_t N>
std::array<T, N> as_array(const c10::List<c10::IValue>& list) {
std::array<T, N> res;
AT_ASSERT(list.size() == N);
std::vector<T> vec;
for (c10::IValue elem : list) {
vec.push_back(elem.to<T>());
}
std::copy(vec.begin(), vec.end(), res.begin());
return res;
}
} // namespace <anonymous>
using Stack = std::vector<c10::IValue>;
// Generated function declaration
${declarations}
} // namespace unboxing
} // namespace at

View File

@ -0,0 +1,22 @@
#pragma once
// ${generated_comment}
#if defined(TORCH_ASSERT_NO_OPERATORS) || defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if including <ATen/core/symbol.h> for \
the c10::Symbol class would be sufficient, or if your change would be \
better placed in another file.
#endif
// ATen symbols correspond exactly to operators defined in ATen. Every
// symbol here corresponds exactly to an ATen operation defined in
// native_functions.yaml; attributes are in one-to-one correspondence
// with their ATen name.
#define FORALL_ATEN_BASE_SYMBOLS(_) \
${aten_symbols}
#define FORALL_ATTR_BASE_SYMBOLS(_) \
${attr_symbols}

View File

@ -0,0 +1,10 @@
#pragma once
// ${generated_comment}
namespace at {
// Enum of valid tags obtained from the entries in tags.yaml
enum class Tag {
${enum_of_valid_tags}
};
}