I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,74 @@
# This yaml file contains all the possible tags that can be defined in `tags` in `native_functions.yaml`
- tag: inplace_view
desc: |
This tag indicates if an operator *only* modifies the tensor metadata
- tag: pt2_compliant_tag
desc: |
This tag indicates if the operator is guaranteed to
work with the PT2 compilation APIs (torch.compile,
torch.export, etc). If you add this tag to an
operator, please use
`torch.testing._internal.optest.opcheck` to test that
the operator has been registered correctly and
works with torch.compile
- tag: view_copy
desc: |
This tag indicates operators that are *_copy* variants
of view/aliasing operators. If an operator has a view_copy tag,
then it should have the name {op}_copy, where {op} is a view operator.
- tag: dynamic_output_shape
desc: |
This tag indicates if an operator's output's shape depends on input Tensor
data.
- tag: data_dependent_output
desc: |
Operator has a non-Tensor output whose value is dependent on the data
of Tensor inputs. Among other things, this implies that this operator
cannot be run with meta tensor (since data is not available), nor
can it be symbolically traced.
- tag: generated
desc: |
This tag indicates that the operator doesn't have an explicit entry in
native_functions.yaml, and instead was generated automatically by the codegen.
- tag: nondeterministic_seeded
desc: |
This tag indicates if an operator is nondeterministically seeded
(i.e., is random) such that the operator intentionally produces
different results when run twice on the same inputs, but this randomness
is controlled by a Generator which, if reseeded would give you the
same result.
- tag: nondeterministic_bitwise
desc: |
This tag indicates if an operator doesn't guarantee bitwise equivalence
across different runs of an operator with identical inputs.
- tag: needs_fixed_stride_order
desc: |
This tag indicates that the operator should be passed Tensors following
the same stride permutation as observed in eager when compiled in inductor.
Only one of {needs_fixed_stride_order, flexible_layout} can apply; if
multiple are assigned then we assume the most restrictive one.
- tag: flexible_layout
desc: |
This tag indicates that the custom operator can accept inputs with varying
strides/storage_offset and that when compiled, Inductor is allowed to change
the strides/storage_offset of inputs to the custom operator.
Only one of {needs_fixed_stride_order, flexible_layout} can apply; if
multiple are assigned then we assume the most restrictive one.
# NOTE [Core ATen Ops]
- tag: core
desc: |
Core aten ops is a subset of aten ops that remains after aten-to-aten decomposition and
functionalization pass. Core aten ops are fully functional and adhere to single static
assignment (SSA): this implies there will be no `inplace` or `_out` variants in this opset.
This opset is designed to serve as the functional IR to interface with compiler backends.
In contrast to primTorch, core aten opset doesn't decompose ops into explicit
type promotion and broadcasting ops.
Core aten ops is also effectively the opset produced by torchdynamo.export(aten_graph=True),
and thus can be used as an opset for export purpose.
- tag: pointwise
desc: |
Pointwise operators are operators where each element of the output is computed only by accessing
the corresponding element of all the broadcasted inputs. The output shape will be the broadcasted
shape of the inputs.

View File

@ -0,0 +1,36 @@
#include <ATen/core/ATenOpList.h>
#include <string>
#include <cstring>
#include <utility>
#include <unordered_set>
#include <ATen/core/operator_name.h>
// ${generated_comment}
namespace at {
namespace {
struct OpNameEquals final {
bool operator()(const std::pair<const char*, const char*>& lhs, const std::pair<const char*, const char*>& rhs) const {
return 0 == strcmp(lhs.first, rhs.first) && 0 == strcmp(lhs.second, rhs.second);
}
};
struct OpNameHash final {
size_t operator()(const std::pair<const char*, const char*>& p) const {
// use std::hash<std::string> because std::hash<const char*> would hash pointers and not pointed-to strings
return std::hash<std::string>()(p.first) ^ (~ std::hash<std::string>()(p.second));
}
};
}
bool is_custom_op(const c10::OperatorName& opName) {
static std::unordered_set<std::pair<const char*, const char*>, OpNameHash, OpNameEquals> ops {
${aten_ops}
{"", ""}
};
return ops.count(std::make_pair(
opName.name.c_str(), opName.overload_name.c_str())) == 0;
}
}

View File

@ -0,0 +1,73 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include <ATen/InferSize.h>
#include <ATen/Tensor.h>
#include <ATen/native/Resize.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
#include <ATen/ops/clone.h>
$ops_headers
#endif
namespace at {
namespace native {
// This file contains a number of kernels for aten functions that are fully code-generated.
// TODO: rename this file to something more generic.
namespace {
at::Tensor clone_arg(const at::Tensor& t) {
return t.clone();
}
std::vector<at::Tensor> clone_arg(const at::TensorList& t_list) {
std::vector<at::Tensor> out(t_list.size());
for (const auto& i : c10::irange(t_list.size())) {
out[i] = t_list[i].clone();
}
return out;
}
// duped with gen_resize_out_helper from structured kernels
void copy_arg(const at::Tensor& dst, const at::Tensor& src) {
TORCH_CHECK(src.dtype() == dst.dtype(),
"Expected out tensor to have dtype ", src.dtype(), ", but got ", dst.dtype(), " instead");
TORCH_CHECK(src.device() == dst.device(),
"Expected out tensor to have device ", src.device(), ", but got ", dst.device(), " instead");
dst.copy_(src);
}
void copy_arg(const at::TensorList& dst, const at::TensorList& src) {
TORCH_INTERNAL_ASSERT(dst.size() == src.size());
for (const auto& i : c10::irange(dst.size())) {
copy_arg(dst[i], src[i]);
}
}
// TODO: this doesn't handle restriding empty tensors correctly; see
// gen_resize_out_helper for the correct algorithm
void resize_out_helper(const at::Tensor& dst, const at::Tensor& src) {
at::native::resize_output(dst, src.sizes());
}
void resize_out_helper(const at::TensorList& dst, const at::TensorList& src) {
TORCH_INTERNAL_ASSERT(dst.size() == src.size());
for (const auto& i : c10::irange(dst.size())) {
at::native::resize_output(dst[i], src[i].sizes());
}
}
}
${CompositeViewCopyKernel_Definitions}
${GeneratedCompositeFunctional_Definitions}
${GeneratedCompositeOut_Definitions}
} // namespace native
} // namespace at

View File

@ -0,0 +1,23 @@
#pragma once
// ${generated_comment}
// NB: The implementing C++ file is RegisterDispatchKey.cpp
// The only #includes we need are for custom classes that have defaults in the C++ API
#include <c10/core/MemoryFormat.h>
#include <c10/core/Scalar.h>
#include <ATen/core/Reduction.h>
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
namespace at {
namespace ${dispatch_namespace} {
${dispatch_namespaced_declarations}
} // namespace ${dispatch_namespace}
} // namespace at

View File

@ -0,0 +1,29 @@
#include <ATen/core/TensorBody.h>
// TODO Undo all logic introduced for Note [Avoiding Include Cycles In Static Dispatch]
// Code introduced to avoid cyclic dependency in static dispatch is no longer
// needed as static dispatch logic is moved from TensorBody.h, which caused cycles in the first place,
// to Operators.cpp for supporting multiple backends with multiple kernels.
//
// Note [Avoiding Include Cycles In Static Dispatch]
// In order to avoid #include cycles in the static dispatch build, we've carefully split out
// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h.
//
// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h.
// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods
// all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all
// directly inlined into TensorBody.h.
// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API,
// which include functions that have defaultable std::optional<Tensor> arguments.
// That requires knowing the full Tensor class definition.
//
// We break the cycle by doing the following:
// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h
// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl.,
// - CPUFunctions_inl.h includes everything else
// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class,
// and then it includes CPUFunctions_inl.h.
// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly.
// - This also means that static dispatch build, CPUFunctions.h only needs to
// #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h.
${inline_headers}

View File

@ -0,0 +1,22 @@
#pragma once
// ${generated_comment}
// NB: The implementing C++ file is RegisterDispatchKey.cpp
// The only #includes we need are for custom classes that have defaults in the C++ API
#include <c10/core/MemoryFormat.h>
#include <c10/core/Scalar.h>
#include <ATen/core/Reduction.h>
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from \
<ATen/ops/{my_operator}_${dispatch_namespace}_dispatch.h>. \
See NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
${DispatchKeyFunctions_inl_includes}
${dispatch_namespaced_declarations}

View File

@ -0,0 +1,13 @@
// ${generated_comment}
${includes}
${native_functions_include}
namespace {
${helper_fns}
} // namespace
${namespace_prologue}
${native_function_definitions}
${namespace_epilogue}

View File

@ -0,0 +1,19 @@
#pragma once
// an external backend might generate file within its code tree
// and check all the source files within the tree with clang-format.
// so, disable it since the backend might have a different config.
// clang-format off
// ${generated_comment}
#include <ATen/Tensor.h>
${namespace_prologue}
struct ${class_name} {
${dispatch_declarations}
};
${namespace_epilogue}

View File

@ -0,0 +1,26 @@
#pragma once
// ${generated_comment}
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
${static_dispatch_ops_headers}
${operator_includes}
namespace at {
${function_definitions}
}

View File

@ -0,0 +1,33 @@
#pragma once
// ${generated_comment}
#include <ATen/Tensor.h>
namespace at {
namespace functionalization {
enum class InverseReturnMode {
/// Specifies that functional inverses should always return a view.
AlwaysView,
/// Specifies that functional inverses should always return a non-view / copy.
NeverView,
/// Specifies that functional inverses should return a view unless a (copying) scatter
/// inverse exists, in which case that will be used instead.
/// This avoids as_strided() calls that can be difficult for subclasses to handle.
ViewOrScatterInverse,
};
struct FunctionalInverses {
${view_inverse_declarations}
// NB: These are not generated! They're manually implemented in the template.
// TODO: Change codegen to generate these. See the following link:
// https://github.com/pytorch/pytorch/blob/main/torchgen/model.py#L2583-L2585
static at::Tensor chunk_inverse(const at::Tensor & base, const at::Tensor & mutated_view, InverseReturnMode inverse_return_mode, int64_t mutated_view_idx, int chunks, int dim);
static at::Tensor narrow_inverse(const at::Tensor & base, const at::Tensor & mutated_view, InverseReturnMode inverse_return_mode, int dim, c10::SymInt start, c10::SymInt length);
};
}
}

View File

@ -0,0 +1,103 @@
#include <array>
#include <ATen/Functions.h>
#include <ATen/Utils.h>
#include <c10/core/Allocator.h>
namespace at {
Tensor TensorMaker::make_tensor() {
AutoDispatchBelowADInplaceOrView guard{}; // TODO: Remove.
tracer::impl::NoTracerDispatchMode tracer_guard{};
check_size_nonnegative(sizes_);
TORCH_CHECK_VALUE(
!deleter_ || !ctx_,
"The deleter and context arguments are mutually exclusive.");
if (device_ == std::nullopt) {
device_ = globalContext().getDeviceFromPtr(data_, opts_.device().type());
}
if (opts_.device().has_index()) {
// clang-format off
TORCH_CHECK_VALUE(
opts_.device() == *device_,
"Specified device ", opts_.device(), " does not match device of data ", *device_);
// clang-format on
}
std::size_t size_bytes = computeStorageSize();
DataPtr data_ptr{};
if (deleter_) {
data_ptr = makeDataPtrFromDeleter();
} else {
data_ptr = makeDataPtrFromContext();
}
TORCH_CHECK(!resizeable_ || allocator_ != nullptr, "Must specify an allocator with allocator() if you want to use resizeable_storage()");
Storage storage{Storage::use_byte_size_t{}, size_bytes, std::move(data_ptr), /*allocator=*/allocator_, /*resizable=*/resizeable_};
Tensor tensor = detail::make_tensor<TensorImpl>(
std::move(storage), opts_.computeDispatchKey(), opts_.dtype());
TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
if (strides_) {
tensor_impl->set_sizes_and_strides(sizes_, *strides_);
} else {
tensor_impl->set_sizes_contiguous(sizes_);
}
if (storage_offset_) {
tensor_impl->set_storage_offset(*storage_offset_);
}
return tensor;
}
std::size_t TensorMaker::computeStorageSize() const noexcept {
std::size_t itemsize = opts_.dtype().itemsize();
if (strides_) {
auto storage_size = detail::computeStorageNbytes(sizes_, *strides_, itemsize);
if (storage_offset_) {
storage_size += storage_offset_.value();
}
return storage_size;
}
std::size_t size = 1;
for (std::int64_t s : sizes_) {
size *= static_cast<std::size_t>(s);
}
auto storage_size = size * itemsize;
if (storage_offset_) {
storage_size += storage_offset_.value();
}
return storage_size;
}
inline DataPtr TensorMaker::makeDataPtrFromDeleter() noexcept {
return InefficientStdFunctionContext::makeDataPtr(data_, std::move(deleter_), *device_);
}
inline DataPtr TensorMaker::makeDataPtrFromContext() noexcept {
return DataPtr{data_, ctx_.release(), ctx_.get_deleter(), *device_};
}
IntArrayRef TensorMaker::makeTempSizes() const noexcept {
static std::int64_t zeros[5] = {0, 0, 0, 0, 0};
if (opts_.has_memory_format()) {
MemoryFormat format = *opts_.memory_format_opt();
if (format == MemoryFormat::ChannelsLast) {
return IntArrayRef(zeros, 4);
}
if (format == MemoryFormat::ChannelsLast3d) {
return IntArrayRef(zeros, 5);
}
}
return IntArrayRef(zeros, 1);
}
} // namespace at

View File

@ -0,0 +1,143 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}.h> and \
see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
// NOTE: [TORCH_ASSERT_ONLY_METHOD_OPERATORS]
//
// In ATen, certain generated headers files include the definitions of
// every single operator in PyTorch. Unfortunately this means every
// time an operator signature is updated or changed in
// native_functions.yaml, you (and every other PyTorch developer) need
// to recompile every source file that includes any of these headers.
//
// To break up these header dependencies, and improve incremental
// build times for all PyTorch developers. These headers are split
// into per-operator headers in the `ATen/ops` folder. This limits
// incremental builds to only changes to methods of `Tensor`, or files
// that use the specific operator being changed. With `at::sum` as an
// example, you should include
//
// <ATen/ops/sum.h> // instead of ATen/Functions.h
// <ATen/ops/sum_native.h> // instead of ATen/NativeFunctions.h
// <ATen/ops/sum_ops.h> // instead of ATen/Operators.h
// <ATen/ops/sum_cpu_dispatch.h> // instead of ATen/CPUFunctions.h
//
// However, even if you're careful to use this in your own code.
// `Functions.h` might be included indirectly through another header
// without you realising. To avoid this, you can add
//
// #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
//
// to the top of your source file. This way any time the non-specific
// headers are included, the compiler will error out.
//
// Also, be aware that `ops` are not available in all build
// configurations (namely fb-internal) so you must guard these
// includes with `#ifdef AT_PER_OPERATOR_HEADERS`. e.g.
//
// #ifndef AT_PER_OPERATOR_HEADERS
// #include <ATen/Functions.h>
// #else
// #include <ATen/ops/sum.h>
// #endif
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <c10/core/SymInt.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/util/OptionalArrayRef.h>
#include <ATen/ops/from_blob.h>
#include <ATen/ops/tensor.h>
${Functions_includes}
namespace at {
${Functions_declarations}
// Special C++ only overloads for std()-like functions (See gh-40287)
// These are needed because int -> bool conversion takes precedence over int -> IntArrayRef
// So, for example std(0) would select the std(unbiased=False) overload
TORCH_API inline Tensor var(const Tensor& self, int dim) {
return at::var(self, IntArrayRef{dim});
}
TORCH_API inline std::tuple<Tensor, Tensor> var_mean(const Tensor& self, int dim) {
return at::var_mean(self, IntArrayRef{dim});
}
TORCH_API inline Tensor std(const Tensor& self, int dim) {
return at::std(self, IntArrayRef{dim});
}
TORCH_API inline std::tuple<Tensor, Tensor> std_mean(const Tensor& self, int dim) {
return at::std_mean(self, IntArrayRef{dim});
}
inline int64_t numel(const Tensor& tensor) {
return tensor.numel();
}
inline int64_t size(const Tensor& tensor, int64_t dim) {
return tensor.size(dim);
}
inline int64_t stride(const Tensor& tensor, int64_t dim) {
return tensor.stride(dim);
}
inline bool is_complex(const Tensor& tensor) {
return tensor.is_complex();
}
inline bool is_floating_point(const Tensor& tensor) {
return tensor.is_floating_point();
}
inline bool is_signed(const Tensor& tensor) {
return tensor.is_signed();
}
inline bool is_inference(const Tensor& tensor) {
return tensor.is_inference();
}
inline bool _is_zerotensor(const Tensor& tensor) {
return tensor._is_zerotensor();
}
inline bool is_conj(const Tensor& tensor) {
return tensor.is_conj();
}
inline Tensor conj(const Tensor& tensor) {
return tensor.conj();
}
inline bool is_neg(const Tensor& tensor) {
return tensor.is_neg();
}
}

View File

@ -0,0 +1,19 @@
#pragma once
// This file contains autogenerated LazyTensor IR nodes
${lazy_ir_sysinc}
${lazy_ir_inc}
${namespace_prologue}
using at::operator<<;
// kNullValue is used to contribute a static hash value any time
// a node has an Optional<Value> input that is nullopt. It is important
// to differentiate between HASH(std::nullopt, something) and HASH(something, std::nullopt),
// and using kNullValue in the hash function in the order of arguments
// serves this purpose.
static const torch::lazy::Value kNullValue = torch::lazy::Value();
${ir_declarations}
${namespace_epilogue}

View File

@ -0,0 +1,11 @@
#pragma once
${lazy_non_native_ir_inc}
// This file contains autogenerated LazyTensor Non Native IR nodes
${namespace_prologue}
${non_native_ir_nodes}
${namespace_epilogue}

View File

@ -0,0 +1,24 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
${MethodOperators_includes}
namespace at {
namespace _ops {
${MethodOperators_declarations}
} // namespace _ops
} // namespace at

View File

@ -0,0 +1,17 @@
#pragma once
// ${generated_comment}
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <tuple>
#include <vector>
${extra_includes}
${native_function_declarations}

View File

@ -0,0 +1,33 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}_native.h> \
and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <tuple>
#include <vector>
${NativeFunctions_includes}
${NativeFunctions_declarations}

View File

@ -0,0 +1,23 @@
#pragma once
// ${generated_comment}
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <optional>
#include <c10/core/QScheme.h>
#include <ATen/core/Reduction.h>
#include <ATen/TensorIterator.h>
#include <ATen/TensorMeta.h>
#include <tuple>
#include <vector>
namespace at {
namespace meta {
${meta_function_declarations}
} // namespace native
} // namespace at

View File

@ -0,0 +1,19 @@
#pragma once
// ${generated_comment}
#include <ATen/core/Tensor.h>
#include <ATen/core/IListRef.h>
#include <ATen/TensorMeta.h>
#include <ATen/TensorIterator.h>
${NativeMetaFunctions_includes}
namespace at {
namespace meta {
${NativeMetaFunctions_declarations}
} // namespace meta
} // namespace at

View File

@ -0,0 +1,18 @@
#pragma once
// ${generated_comment}
#include <tuple>
#include <vector>
// Forward declarations of any types needed in the operator signatures.
// We can't directly include these classes because it will cause circular include dependencies.
// This file is included by TensorBody.h, which defines the Tensor class.
#include <ATen/core/ATen_fwd.h>
namespace at {
namespace _ops {
${declarations}
}} // namespace at::_ops

View File

@ -0,0 +1,19 @@
#include <ATen/Tensor.h>
#include <ATen/core/dispatch/Dispatcher.h>
// ${generated_comment}
// NOTE See [Sharded File] comment in VariableType
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
${operator_headers}
#endif
${static_dispatch_extra_headers}
namespace at { namespace _ops {
${definitions}
}} // namespace at::_ops

View File

@ -0,0 +1,74 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider including a specific operator from <ATen/ops/{my_operator}_ops.h> \
and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS].
#endif
#include <c10/core/SymInt.h>
#include <c10/core/SymIntArrayRef.h>
#include <c10/core/Scalar.h>
#include <c10/core/TensorOptions.h>
#include <c10/core/QScheme.h>
#include <c10/util/OptionalArrayRef.h>
#include <tuple>
#include <vector>
${Operators_includes}
// Extension writers: do you write wrapper functions? Are you frustrated with
// resolving overloads of operators? Are you frustrated with dealing with
// pointer-to-methods and resolving overloads of pointer-to-methods?? Look no
// further, this is the utility for you.
//
// Given an operator schema: aten::op.overload(...
//
// Use ATEN_FN2(op, overload) to get a *function* version of the operator
// that is guaranteed to not be overloaded. This means that you can safely
// decltype(&ATEN_FN2(op, overload)) it. NB: the 2 means this macro takes 2 args.
//
// Given an operator schema without an overload name: aten::op(...
//
// Use ATEN_FN(op) to get an unambiguous *function* version of the operator.
//
// There is some interesting behavior for out= operations.
// ATEN_FN2(sin, out) gives a function that is *faithful* to the schema;
// that is, the order of arguments is exactly what it looks like in the schema.
#define ATEN_FN2(op_name, overload) at::_ops::op_name##_##overload::call
#define ATEN_FN(op_name) at::_ops::op_name::call
// Separately, ATEN_OP(op) and ATEN_OP2(op, overload) define a class containing compile-time
// metadata about a given aten operator.
// Notable data on the class includes:
// - ATEN_OP2(add, Tensor)::name // returns the string name: "add"
// - ATEN_OP2(add, Tensor)::overload_name // returns the string overload name: "Tensor"
// - ATEN_OP2(add, Tensor)::schema // returns the C++ schema type: at::Tensor (const at::Tensor &, const at::Tensor &, const at::Scalar &)
// - ATEN_OP2(add, Tensor)::schema_str // returns the string jit type: "add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor"
#define ATEN_OP2(op_name, overload) at::_ops::op_name##_##overload
#define ATEN_OP(op_name) at::_ops::op_name
// WARNING: Please do not call any of the ops in the _ops namespace directly.
// Use the ATEN_FN macros. We do not guarantee stability of the naming
// scheme for the functions in at::_ops
// See Note [The ATen Operators API] for details of the at::_ops namespace
namespace at {
namespace _ops {
${Operators_declarations}
} // namespace _ops
} // namespace at

View File

@ -0,0 +1,15 @@
// ${generated_comment}
#include <ATen/RedispatchFunctions.h>
#include <ATen/Functions.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/core/op_registration/adaption.h>
namespace at {
namespace redispatch {
${function_redispatch_definitions}
} // namespace redispatch
} // namespace at

View File

@ -0,0 +1,32 @@
#pragma once
// ${generated_comment}
#ifdef TORCH_ASSERT_ONLY_METHOD_OPERATORS
#error This change adds a dependency on all pytorch operators, meaning the \
file will need to be re-compiled every time an operator is changed or added. \
Consider using the at::_ops::{name}::redispatch() interface by including \
the specific operator from <ATen/ops/{my_operator}_ops.h>
#endif
#include <c10/core/Scalar.h>
#include <ATen/Tensor.h>
#include <c10/core/Storage.h>
#include <ATen/core/Generator.h>
#include <c10/util/Deprecated.h>
#include <ATen/DeviceGuard.h>
#include <c10/core/TensorOptions.h>
#include <ATen/core/Reduction.h>
#include <optional>
#include <ATen/TensorUtils.h>
#include <ATen/Context.h>
#include <ATen/TracerMode.h>
#include <ATen/Operators.h>
namespace at {
namespace redispatch {
${function_redispatch_definitions}
} // namespace redispatch
}

View File

@ -0,0 +1,29 @@
// We register ops with a higher priority dispatch key (BackendSelect) than the usual backend-specific keys (e.g. CPU)
// which makes calls to the factory functions dispatch to here.
// We then 'manually' compute a lower-priority to re-dispatch to (e.g. CPU) to get to the eventually correct backend.
// ${generated_comment}
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <ATen/core/dispatch/DispatchKeyExtractor.h>
#include <torch/library.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#else
${ops_headers}
#endif
namespace at {
namespace {
${backend_select_method_definitions}
TORCH_LIBRARY_IMPL(aten, BackendSelect, m) {
${backend_select_function_registrations};
}
} // namespace
} // at

View File

@ -0,0 +1,41 @@
#include <torch/csrc/jit/runtime/operator.h>
#include <torch/csrc/jit/runtime/custom_operator.h>
#include <torch/csrc/jit/runtime/register_ops_utils.h>
#include <ATen/UnboxingFunctions.h>
// ${generated_comment}
// NOTE [Sharded File]: This file is generated in a sharded fashion to speed up
// incremental rebuilds. See the comment at the top of
// templates/VariableType.cpp for an analogous, in-depth discussion.
//
// Generated by tools/jit/gen_unboxing.py. This file registers all ATen ops into JIT op registry instead of c10
// dispatcher. JIT op registry only takes boxed kernels, so we are calling unboxing functions in UnboxingFunctions.h
// to cast arguments into C++ types (instead of IValue) and delegate to unboxed kernels.
namespace torch { namespace jit {
using autograd::Variable;
using autograd::variable_list;
using at::Scalar;
using at::ScalarType;
using at::Tensor;
using at::TensorOptions;
using at::DeviceGuard;
using ::c10::fmap;
using ::c10::filter;
namespace {
RegisterOperators reg({
// Generated operators
${unboxed_ops}
});
} // anon namespace
}} // namespace torch::jit

View File

@ -0,0 +1,24 @@
${ns_prologue}
// NB: TORCH_LIBRARY_IMPL must be in an anonymous namespace to avoid
// ambiguity with conflicting identifiers that may have been defined in
// at namespace already.
namespace {
${dispatch_helpers}
${dispatch_anonymous_definitions}
${static_init_dispatch_registrations}
} // anonymous namespace
${deferred_dispatch_registrations}
namespace ${dispatch_namespace} {
${dispatch_namespaced_definitions}
} // namespace ${dispatch_namespace}
${ns_epilogue}

View File

@ -0,0 +1,54 @@
// required for old g++ to compile PRId64 macros, see
// https://github.com/pytorch/pytorch/issues/3571
// for context
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
// an external backend might generate file within its code tree
// and check all the source files within the tree with clang-format.
// so, disable it since the backend might have a different config.
// clang-format off
// NOTE: This condition is true for all PyTorch internal libraries, it
// just excludes external projects such as torch_xla which
// re-use some of the PyTorch codegen machinery.
#if defined(CAFFE2_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_BUILD_MAIN_LIB) || \
defined(TORCH_HIP_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_CU_BUILD_MAIN_LIB) || \
defined(TORCH_CUDA_CPP_BUILD_MAIN_LIB)
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#endif
// ${generated_comment}
#include <c10/core/TensorImpl.h>
#include <c10/core/Allocator.h>
#include <ATen/DeviceGuard.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/Utils.h>
#include <ATen/WrapDimUtils.h>
#include <ATen/Dispatch.h>
#include <c10/util/ExclusivelyOwned.h>
#include <c10/util/Half.h>
#include <c10/core/UndefinedTensorImpl.h>
#include <optional>
#include <ATen/Tensor.h>
#include <ATen/native/Resize.h>
#include <cstddef>
#include <functional>
#include <memory>
#include <utility>
#include <ATen/Config.h>
#include <ATen/core/op_registration/adaption.h>
#include <torch/library.h>
$extra_cuda_headers
$external_backend_headers
$dispatch_headers
$ops_headers
// See template file RegisterDispatchDefinitions.ini
$dispatch_definitions

View File

@ -0,0 +1,110 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
// ${generated_comment}
#include <ATen/core/LegacyTypeDispatch.h>
#include <ATen/EmptyTensor.h>
#include <ATen/FunctionalTensorWrapper.h>
#include <ATen/FunctionalInverses.h>
#include <ATen/MemoryOverlap.h>
#include <torch/library.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Operators.h>
#include <ATen/NativeFunctions.h>
#else
// needed for the meta tensor calls to get stride info in functionalization
#include <ATen/ops/empty_strided_native.h>
// needed for special handling of copy_().
// See Note [functionalizating copy_() and not preserving strides]
#include <ATen/ops/to_ops.h>
#include <ATen/ops/expand_copy_ops.h>
$ops_headers
#endif
namespace at {
namespace functionalization {
// This keyset is used by functionalization when it calls into meta kernels
// to accurately propagate stride metadata.
// Exclude any modes: the purpose of calling into meta kernels is only as an implementation
// detail to perform shape inference, and we don't want any modal keys to run.
// Specifically, we want to prevent functionalization and Python modes from running.
constexpr auto exclude_keys_for_meta_dispatch =
c10::functorch_transforms_ks |
c10::DispatchKeySet({
c10::DispatchKey::FuncTorchDynamicLayerBackMode,
c10::DispatchKey::FuncTorchDynamicLayerFrontMode,
c10::DispatchKey::Python,
c10::DispatchKey::PreDispatch,
});
// Helper around at::has_internal_overlap.
// The ATen util is used in hot-path eager mode: it's always fast,
// but might return TOO_HARD sometimes.
// During functionalization, we're ok taking a bit longer
// to detect memory overlap.
inline bool has_internal_overlap_helper(const at::Tensor t) {
auto has_overlap = at::has_internal_overlap(t);
if (has_overlap == at::MemOverlap::Yes) return true;
if (has_overlap == at::MemOverlap::No) return false;
return false;
}
inline Tensor to_meta(const Tensor& t) {
if (!t.defined()) return t;
return at::native::empty_strided_meta_symint(t.sym_sizes(), t.sym_strides(),
/*dtype=*/std::make_optional(t.scalar_type()), /*layout=*/std::make_optional(t.layout()),
/*device=*/std::make_optional(c10::Device(kMeta)), /*pin_memory=*/std::nullopt);
}
inline std::optional<Tensor> to_meta(const std::optional<Tensor>& t) {
if (t.has_value()) {
return std::make_optional<Tensor>(to_meta(*t));
}
return std::nullopt;
}
inline std::vector<Tensor> to_meta(at::ITensorListRef t_list) {
std::vector<Tensor> outputs;
outputs.reserve(t_list.size());
for (const auto& tensor : t_list) {
outputs.push_back(to_meta(tensor));
}
return outputs;
}
inline c10::List<Tensor> to_meta(const c10::List<Tensor>& t_list) {
c10::List<Tensor> outputs;
outputs.reserve(t_list.size());
for (const auto i : c10::irange(t_list.size())) {
outputs.push_back(to_meta(t_list[i]));
}
return outputs;
}
inline c10::List<::std::optional<Tensor>> to_meta(const c10::List<::std::optional<Tensor>>& t_list) {
c10::List<::std::optional<Tensor>> outputs;
outputs.reserve(t_list.size());
for (const auto i : c10::irange(t_list.size())) {
outputs.push_back(to_meta(t_list[i]));
}
return outputs;
}
${func_definitions}
} // namespace functionalization
namespace {
TORCH_LIBRARY_IMPL(aten, Functionalize, m) {
${func_registrations};
}
} // namespace
} // namespace at

View File

@ -0,0 +1,13 @@
// ${generated_comment}
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <torch/library.h>
namespace at {
TORCH_LIBRARY(aten, m) {
${aten_schema_registrations};
// Distributed Ops
// Implementations located in torch/csrc/jit/runtime/register_distributed_ops.cpp
m.def("get_gradients(int context_id) -> Dict(Tensor, Tensor)");
}
${schema_registrations}
} // namespace at

View File

@ -0,0 +1,4 @@
// This file contains all native_functions that can be registered to
// and the schema string that they should be registered with
${registration_declarations}

View File

@ -0,0 +1,753 @@
#pragma once
#ifdef TORCH_ASSERT_NO_OPERATORS
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if your change would be better placed in \
another file, or if a more specific header might achieve the same goal. \
See NOTE: [Tensor vs. TensorBase]
#endif
#include <c10/core/Device.h>
#include <c10/core/Layout.h>
#include <c10/core/MemoryFormat.h>
#include <c10/core/QScheme.h>
#include <c10/core/Stream.h>
#include <c10/core/Scalar.h>
#include <c10/core/ScalarType.h>
#include <c10/core/ScalarTypeToTypeMeta.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorImpl.h>
#include <c10/core/UndefinedTensorImpl.h>
#include <c10/core/WrapDimMinimal.h>
#include <c10/util/Exception.h>
#include <c10/util/ExclusivelyOwned.h>
#include <c10/util/Deprecated.h>
#include <c10/util/MaybeOwned.h>
#include <optional>
#include <c10/util/OptionalArrayRef.h>
#include <c10/util/intrusive_ptr.h>
#include <c10/macros/Export.h>
#include <ATen/core/CheckMemoryFormat.h>
#include <ATen/core/DeprecatedTypePropertiesRegistry.h>
#include <ATen/core/DeprecatedTypeProperties.h>
#include <ATen/core/NamedTensor.h>
#include <ATen/core/QuantizerBase.h>
#include <c10/core/SymInt.h>
#include <ATen/core/TensorAccessor.h>
#include <ATen/core/TensorBase.h>
#include <ATen/MethodOperators.h>
namespace c10{
template<class T> class List;
template<class T> class IListRef;
}
namespace at {
struct Generator;
struct Type;
class DeprecatedTypeProperties;
class Tensor;
} // namespace at
namespace at {
namespace indexing {
struct TensorIndex;
} // namespace indexing
} // namespace at
namespace torch { namespace autograd {
struct Node;
}} // namespace torch::autograd
namespace at {
class OptionalTensorRef;
class TensorRef;
class Tensor;
using TensorList = ArrayRef<Tensor>;
using ITensorList = c10::IListRef<Tensor>;
using Stream = c10::Stream;
// Tensor is a "generic" object holding a pointer to the underlying TensorImpl object, which
// has an embedded reference count. In this way, Tensor is similar to boost::intrusive_ptr.
//
// For example:
//
// void func(Tensor a) {
// Tensor b = a;
// ...
// }
//
// In this example, when we say Tensor b = a, we are creating a new object that points to the
// same underlying TensorImpl, and bumps its reference count. When b goes out of scope, the
// destructor decrements the reference count by calling release() on the TensorImpl it points to.
// The existing constructors, operator overloads, etc. take care to implement the correct semantics.
//
// Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and
// special care must be taken to handle this.
class TORCH_API Tensor: public TensorBase {
protected:
// Create a Tensor with a +0 reference count. Special care must be
// taken to avoid decrementing this reference count at destruction
// time. Intended to support MaybeOwnedTraits<Tensor>.
explicit Tensor(unsafe_borrow_t, const TensorBase& rhs): TensorBase(unsafe_borrow_t{}, rhs) {}
friend MaybeOwnedTraits<Tensor>;
friend OptionalTensorRef;
friend TensorRef;
public:
Tensor() = default;
// This constructor should not be used by end users and is an implementation
// detail invoked by autogenerated code.
explicit Tensor(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
: TensorBase(std::move(tensor_impl)) {}
Tensor(const Tensor &tensor) = default;
Tensor(Tensor &&tensor) = default;
// Implicitly move-constructible from TensorBase, but must be explicit to increase refcount
explicit Tensor(const TensorBase &base): TensorBase(base) {}
/*implicit*/ Tensor(TensorBase &&base): TensorBase(std::move(base)) {}
// Creates a new wrapper from TensorImpl. Intentionally a free method because
// it should be used with care. Checks necessary invariants
static Tensor wrap_tensor_impl(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl) {
return TensorBase::wrap_tensor_impl(std::move(tensor_impl));
}
Tensor contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const {
return TensorBase::contiguous(memory_format);
}
Tensor conj() const {
if (!this->is_complex()) {
return *this;
}
switch (this->layout()) {
case at::kSparse:
case at::kSparseCsr:
case at::kSparseCsc:
case at::kSparseBsr:
case at::kSparseBsc:
return this->conj_physical();
default:
return this->_conj();
}
}
// Aliased by Dimname overloads, so need explicit using
using TensorBase::size;
using TensorBase::sym_size;
using TensorBase::stride;
/// Should be used if *this can reasonably be expected to be contiguous and
/// performance is important.
/// Compared to contiguous, it saves a reference count
/// increment/decrement if *this is already contiguous, at the cost
/// in all cases of an extra pointer of stack usage, an extra branch
/// to access, and an extra branch at destruction time.
c10::MaybeOwned<Tensor> expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const &;
// Use .contiguous() instead. Trying to borrow from a prvalue Tensor
// will only lead to trouble and dangling references.
c10::MaybeOwned<Tensor> expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete;
// The following overloads are very intruiging. Consider the following
// program:
//
// x[1] = 3;
//
// We would expect that the first entry of x is written to 3. But how can we
// actually achieve this? x[1] evaluates to a tensor...
//
// The answer is, using a ref-qualifier. x[1] is an rvalue, which cannot be
// (profitably) assigned to in the traditional sense, so we overload
// assignment to mean, "Actually, copy 3 into the tensor data." This is done
// with an rvalue-reference ref-qualified overload (the methods with && at the
// end of their type.)
//
// There's one more fly in the ointment: We also want
//
// Tensor x = y;
//
// to work, and we want it NOT to copy. So we need a traditional operator=
// overload. But we MUST specify a mutable lvalue ref-qualifier, to
// disambiguate the traditional overload from the rvalue-reference
// ref-qualified overload. Otherwise, it will be ambiguous, because
// a non ref-qualified method is eligible for all situations.
// Unfortunately, we have to write these constructors out manually
// to work around an MSVC bug:
// error C2580: 'at::Tensor &at::Tensor::operator =(const at::Tensor &) &':
// multiple versions of a defaulted special member functions are not allowed
// Tensor& operator=(const Tensor&) & = default;
// Tensor& operator=(Tensor&&) & = default;
// Also MSVC will wrongly issue the following warning with the aforementioned fix
// warning C4522: 'at::Tensor': multiple assignment operators specified
// Let's just skip the warning.
//
// TODO: temporarily disabled
Tensor& operator=(const TensorBase& x) & {
impl_ = x.getIntrusivePtr();
return *this;
}
Tensor& operator=(TensorBase&& x) & noexcept {
impl_ = x.unsafeReleaseIntrusivePtr();
return *this;
}
Tensor& operator=(const Tensor &x) & {
return operator=(static_cast<const TensorBase&>(x));
}
Tensor& operator=(Tensor &&x) & noexcept {
return operator=(static_cast<TensorBase&&>(x));
}
Tensor& operator=(const Scalar &v) && {
return fill_(v);
}
Tensor& operator=(const Tensor &rhs) && {
return copy_(rhs);
}
Tensor& operator=(Tensor&& rhs) && {
return copy_(rhs);
}
C10_DEPRECATED_MESSAGE("Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device().")
DeprecatedTypeProperties & type() const {
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
dispatchKeyToBackend(legacyExtractDispatchKey(key_set())),
scalar_type());
}
Tensor toType(ScalarType t) const {
return to(options().dtype(t), /*non_blocking*/ false, /*copy*/ false);
}
// TODO: Deprecate me
Tensor toBackend(Backend b) const {
return to(options().device(backendToDeviceType(b)).layout(layout_from_backend(b)), /*non_blocking*/ false, /*copy*/ false);
}
C10_DEPRECATED_MESSAGE("Tensor.is_variable() is deprecated; everything is a variable now. (If you want to assert that variable has been appropriately handled already, use at::impl::variable_excluded_from_dispatch())")
bool is_variable() const noexcept {
return !at::impl::variable_excluded_from_dispatch();
}
template<typename T>
C10_DEPRECATED_MESSAGE("Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead.")
T * data() const {
return data_ptr<T>();
}
template <typename T>
T item() const;
template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead")
GenericPackedTensorAccessor<T,N,PtrTraits,index_t> packed_accessor() const & {
return generic_packed_accessor<T,N,PtrTraits,index_t>();
}
template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead")
GenericPackedTensorAccessor<T,N,PtrTraits,index_t> packed_accessor() && = delete;
Tensor operator~() const {
return bitwise_not();
}
Tensor operator-() const {
return neg();
}
Tensor& operator+=(const Tensor & other) {
return add_(other);
}
Tensor& operator+=(const Scalar & other) {
return add_(other);
}
Tensor& operator-=(const Tensor & other) {
return sub_(other);
}
Tensor& operator-=(const Scalar & other) {
return sub_(other);
}
Tensor& operator*=(const Tensor & other) {
return mul_(other);
}
Tensor& operator*=(const Scalar & other) {
return mul_(other);
}
Tensor& operator/=(const Tensor & other) {
return div_(other);
}
Tensor& operator/=(const Scalar & other) {
return div_(other);
}
Tensor& operator&=(const Tensor & other) {
return bitwise_and_(other);
}
Tensor& operator|=(const Tensor & other) {
return bitwise_or_(other);
}
Tensor& operator^=(const Tensor & other) {
return bitwise_xor_(other);
}
Tensor operator[](const Scalar & index) const {
if (!index.isIntegral(false)) {
TORCH_CHECK_INDEX(false, "Can only index tensors with integral scalars");
}
return this->operator[](index.toLong());
}
Tensor operator[](const Tensor & index) const {
// These properties are checked in the Scalar constructor, but we already
// check them here to provide more useful diagnostics for the user.
if (!index.defined()) {
TORCH_CHECK_INDEX(false, "Can only index with tensors that are defined");
}
if (index.dim() != 0) {
TORCH_CHECK_INDEX(false,
"Can only index with tensors that are scalars (zero-dim)");
}
// The Scalar(Tensor) constructor is explicit, so we need to call it.
return this->operator[](index.item());
}
Tensor operator[](int64_t index) const {
return select(0, index);
}
Tensor index(ArrayRef<at::indexing::TensorIndex> indices) const;
Tensor index(std::initializer_list<at::indexing::TensorIndex> indices) const;
Tensor & index_put_(ArrayRef<at::indexing::TensorIndex> indices, Tensor const & rhs);
Tensor & index_put_(ArrayRef<at::indexing::TensorIndex> indices, const Scalar& v);
Tensor & index_put_(std::initializer_list<at::indexing::TensorIndex> indices, Tensor const & rhs);
Tensor & index_put_(std::initializer_list<at::indexing::TensorIndex> indices, const Scalar& v);
Tensor cpu() const {
return to(options().device(c10::DeviceType::CPU), /*non_blocking*/ false, /*copy*/ false);
}
// TODO: The Python version also accepts arguments
Tensor cuda() const {
return to(options().device(c10::DeviceType::CUDA), /*non_blocking*/ false, /*copy*/ false);
}
Tensor hip() const {
return to(options().device(c10::DeviceType::HIP), /*non_blocking*/ false, /*copy*/ false);
}
Tensor ve() const {
return to(options().device(c10::DeviceType::VE), /*non_blocking*/ false, /*copy*/ false);
}
Tensor vulkan() const {
return to(options().device(c10::DeviceType::Vulkan), /*non_blocking*/ false, /*copy*/ false);
}
Tensor metal() const {
return to(options().device(c10::DeviceType::Metal), /*non_blocking*/ false, /*copy*/ false);
}
Tensor meta() const {
return to(options().device(c10::DeviceType::Meta), /*non_blocking*/ false, /*copy*/ false);
}
// ~~~~~ Autograd API ~~~~~
/// \fn bool is_leaf() const;
///
/// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention.
///
/// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were
/// created by the user. This means that they are not the result of an operation and so
/// `grad_fn()` is `nullptr`.
///
/// Only leaf Tensors will have their `grad()` populated during a call to `backward()`.
/// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`.
///
/// Example:
/// @code
/// auto a = torch::rand(10, torch::requires_grad());
/// std::cout << a.is_leaf() << std::endl; // prints `true`
///
/// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA);
/// std::cout << b.is_leaf() << std::endl; // prints `false`
/// // b was created by the operation that cast a cpu Tensor into a cuda Tensor
///
/// auto c = torch::rand(10, torch::requires_grad()) + 2;
/// std::cout << c.is_leaf() << std::endl; // prints `false`
/// // c was created by the addition operation
///
/// auto d = torch::rand(10).cuda();
/// std::cout << d.is_leaf() << std::endl; // prints `true`
/// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine)
///
/// auto e = torch::rand(10).cuda().requires_grad_();
/// std::cout << e.is_leaf() << std::endl; // prints `true`
/// // e requires gradients and has no operations creating it
///
/// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true));
/// std::cout << f.is_leaf() << std::endl; // prints `true`
/// // f requires grad, has no operation creating it
/// @endcode
/// \fn void backward(const Tensor & gradient={}, std::optional<bool> retain_graph=std::nullopt, bool create_graph=false, std::optional<TensorList> inputs=std::nullopt) const;
///
/// Computes the gradient of current tensor with respect to graph leaves.
///
/// The graph is differentiated using the chain rule. If the tensor is
/// non-scalar (i.e. its data has more than one element) and requires
/// gradient, the function additionally requires specifying ``gradient``.
/// It should be a tensor of matching type and location, that contains
/// the gradient of the differentiated function w.r.t. this Tensor.
///
/// This function accumulates gradients in the leaves - you might need to
/// zero them before calling it.
///
/// \param gradient Gradient w.r.t. the
/// tensor. If it is a tensor, it will be automatically converted
/// to a Tensor that does not require grad unless ``create_graph`` is True.
/// None values can be specified for scalar Tensors or ones that
/// don't require grad. If a None value would be acceptable then
/// this argument is optional.
/// \param retain_graph If ``false``, the graph used to compute
/// the grads will be freed. Note that in nearly all cases setting
/// this option to True is not needed and often can be worked around
/// in a much more efficient way. Defaults to the value of
/// ``create_graph``.
/// \param create_graph If ``true``, graph of the derivative will
/// be constructed, allowing to compute higher order derivative
/// products. Defaults to ``false``.
/// \param inputs Inputs w.r.t. which the gradient will be accumulated into
/// ``at::Tensor::grad``. All other Tensors will be ignored. If not
/// provided, the gradient is accumulated into all the leaf Tensors
/// that were used to compute the current tensor.
/// When inputs are provided and a given input is not a leaf,
/// the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients).
/// It is an implementation detail on which the user should not rely.
/// See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.
void backward(const Tensor & gradient={}, std::optional<bool> retain_graph=std::nullopt, bool create_graph=false, std::optional<TensorList> inputs=std::nullopt) const {
// NB: Adding this wrapper to _backward here because we'd like our
// 'backwards' api to accept the 'inputs' argument optionally. Since code gen
// currently does not support optional of TensorList our approach is to replace
// backward in native_functions.yaml with _backward and call it here instead.
if (inputs.has_value()) {
TORCH_CHECK(inputs.value().size() > 0, "'inputs' argument to backward cannot be empty")
this->_backward(inputs.value(), gradient, retain_graph, create_graph);
} else {
this->_backward({}, gradient, retain_graph, create_graph);
}
}
/// \fn Tensor detach() const;
///
/// Returns a new Tensor, detached from the current graph.
/// The result will never require gradient.
/// \fn Tensor & detach_() const;
///
/// Detaches the Tensor from the graph that created it, making it a leaf.
/// Views cannot be detached in-place.
/// \fn void retain_grad() const;
///
/// Enables this Tensor to have their :attr:`grad` populated during
/// :func:`backward`. This is a no-op for leaf tensors.
/// \fn bool retains_grad() const;
///
/// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be
/// populated during :func:`backward`, ``false`` otherwise.
const Tensor& set_requires_grad(bool requires_grad) const {
TensorBase::set_requires_grad(requires_grad);
return *this;
}
/// Return a mutable reference to the gradient. This is conventionally
/// used as `t.grad() = x` to set a gradient to a completely new tensor.
/// Note that this function work with a non-const Tensor and is not
/// thread safe.
Tensor& mutable_grad() const {
return impl_->mutable_grad();
}
/// This function returns an undefined tensor by default and returns a defined tensor
/// the first time a call to `backward()` computes gradients for this Tensor.
/// The attribute will then contain the gradients computed and future calls
/// to `backward()` will accumulate (add) gradients into it.
const Tensor& grad() const {
const Tensor& maybe_grad = impl_->grad();
if (!is_leaf() && !retains_grad() && !maybe_grad.defined()) {
TORCH_WARN(
"The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad "
"attribute won't be populated during autograd.backward(). If you indeed want the .grad "
"field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. "
"If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor "
"instead. See github.com/pytorch/pytorch/pull/30531 for more informations.");
}
return maybe_grad;
}
// The Forward AD API functions below are low level and are not to be used by end
// users who should use the API provided in torch/csrc/autograd.h
/// This function returns the forward gradient for this Tensor at the given level.
const Tensor& _fw_grad(uint64_t level) const {
return impl_->_fw_grad(level, *this);
}
/// This function can be used to set the value of the forward grad.
/// Note that the given new_grad might not be used directly if it has different
/// metadata (size/stride/storage offset) compared to this Tensor. In that case,
/// new_grad content will be copied into a new Tensor
void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const {
impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op);
}
// STOP. Thinking of adding a method here, which only makes use
// of other ATen methods? Define it in native_functions.yaml.
//example
//Tensor * add(Tensor & b);
${tensor_method_declarations}
// Special C++ only overloads for std()-like functions (See gh-40287)
// These are needed because int -> bool conversion takes precedence over int -> IntArrayRef
// So, for example std(0) would select the std(unbiased=False) overload
Tensor var(int dim) const {
return var(IntArrayRef{dim});
}
Tensor std(int dim) const {
return std(IntArrayRef{dim});
}
// We changed .dtype() to return a TypeMeta in #12766. Ideally, we want the
// at::kDouble and its friends to be TypeMeta's, but that hasn't happened yet.
// Before that change, we make this method to maintain BC for C++ usage like
// `x.to(y.dtype)`.
// TODO: remove following two after at::kDouble and its friends are TypeMeta's.
inline Tensor to(caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
return this->to(/*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
}
inline Tensor to(Device device, caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
return this->to(device, /*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
}
template <typename F, typename... Args>
decltype(auto) m(F func, Args&&... params) const {
return func(*this, std::forward<Args>(params)...);
}
/// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended
/// to be used from functions that need to access the `Variable`'s equivalent `Tensor`
/// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`).
///
/// One notable difference with the legacy `.data()` function is that changes to the
/// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset)
/// will not update the original `Variable`, due to the fact that this function
/// shallow-copies the `Variable`'s underlying TensorImpl.
at::Tensor tensor_data() const {
return TensorBase::tensor_data();
}
/// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data`
/// in Python, which create a new `Variable` that shares the same storage and
/// tensor metadata with the original `Variable`, but with a completely new
/// autograd history.
///
/// NOTE: If we change the tensor metadata (e.g. sizes / strides /
/// storage / storage_offset) of a variable created from `var.variable_data()`, those
/// changes will not update the original variable `var`. In `.variable_data()`, we set
/// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal,
/// in order to prevent users from changing metadata of `var.variable_data()`
/// and expecting the original variable `var` to also be updated.
at::Tensor variable_data() const {
return TensorBase::variable_data();
}
// Hooks
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
template <typename T>
using hook_return_void_t = std::enable_if_t<std::is_void<typename std::invoke_result_t<T&, Tensor>>::value, unsigned>;
template <typename T>
using hook_return_var_t = std::enable_if_t<std::is_same<typename std::invoke_result_t<T&, Tensor>, Tensor>::value, unsigned>;
/// Registers a backward hook.
///
/// The hook will be called every time a gradient with respect to the Tensor is computed.
/// The hook should have one of the following signature:
/// ```
/// hook(Tensor grad) -> Tensor
/// ```
/// ```
/// hook(Tensor grad) -> void
/// ```
/// The hook should not modify its argument, but it can optionally return a new gradient
/// which will be used in place of `grad`.
///
/// This function returns the index of the hook in the list which can be used to remove hook.
///
/// Example:
/// @code
/// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad());
/// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient
/// v.backward(torch::tensor({1., 2., 3.}));
/// // This prints:
/// // ```
/// // 2
/// // 4
/// // 6
/// // [ CPUFloatType{3} ]
/// // ```
/// std::cout << v.grad() << std::endl;
/// v.remove_hook(h); // removes the hook
/// @endcode
template <typename T>
hook_return_void_t<T> register_hook(T&& hook) const;
template <typename T>
hook_return_var_t<T> register_hook(T&& hook) const;
// Variable methods
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tensor data() const {
return TensorBase::data();
}
void _backward(TensorList inputs, const std::optional<Tensor>& gradient, std::optional<bool> keep_graph, bool create_graph) const;
const Tensor& requires_grad_(bool _requires_grad=true) const {
TensorBase::requires_grad_(_requires_grad);
return *this;
}
};
namespace detail {
// Helper creator for Tensor class which doesn't requires the users to pass
// in an intrusive_ptr instead it just converts the argument passed to
// requested intrusive_ptr type.
template <typename T, typename... Args>
Tensor make_tensor(Args&&... args) {
return Tensor(c10::make_intrusive<T>(std::forward<Args>(args)...));
}
} // namespace detail
} // namespace at
namespace at {
${tensor_method_definitions}
} // namespace at
namespace c10 {
template <>
struct MaybeOwnedTraits<at::Tensor> {
using owned_type = at::Tensor;
using borrow_type = at::Tensor;
static borrow_type createBorrow(const owned_type& from) {
// NOTE: this can be implemented without the special
// unsafe_borrow_t Tensor constructor as
//
// return borrow_type(c10::intrusive_ptr<at::TensorImpl, at::UndefinedTensorImpl>::reclaim(from.unsafeGetTensorImpl()));
//
// but that hurts inlining due to the nullptr check in the
// Tensor(c10::intrusive_ptr<...>) constructor. We already know
// that from.impl_ isn't null because from is a valid Tensor, so
// we needn't do the check again. (using __builtin_assume can
// avoid this, but wouldn't be portable to MSVC.)
return borrow_type(borrow_type::unsafe_borrow_t{}, from);
}
static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) {
lhs.unsafeReleaseTensorImpl();
// See above note: this can be implemented with public API
// similarly to createBorrow(), but that would hurt inlining.
lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs);
}
static void destroyBorrow(borrow_type& toDestroy) {
toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0.
}
static const owned_type& referenceFromBorrow(const borrow_type& borrow) {
return borrow;
}
static const owned_type* pointerFromBorrow(const borrow_type& borrow) {
return &borrow;
}
static bool debugBorrowIsValid(const borrow_type& /*borrow*/) {
return true;
}
};
template <>
struct ExclusivelyOwnedTraits<at::Tensor> {
using repr_type = at::Tensor;
using pointer_type = at::Tensor*;
using const_pointer_type = const at::Tensor*;
static repr_type nullRepr() {
return at::Tensor();
}
template <class... Args>
static repr_type createInPlace(Args&&... args) {
return at::Tensor(std::forward<Args>(args)...);
}
static repr_type moveToRepr(at::Tensor&& x) {
return std::move(x);
}
static void destroyOwned(at::Tensor& x) {
return ExclusivelyOwnedTraits<at::TensorBase>::destroyOwned(x);
}
static at::Tensor take(at::Tensor& x) {
return std::move(x);
}
static pointer_type getImpl(repr_type& x) {
return &x;
}
static const_pointer_type getImpl(const repr_type& x) {
return &x;
}
};
} // namespace c10
namespace at {
inline c10::MaybeOwned<Tensor> borrow_from_optional_tensor(
const std::optional<Tensor>& opt) {
return opt.has_value()
? c10::MaybeOwned<Tensor>::borrowed(*opt)
: c10::MaybeOwned<Tensor>::owned(std::in_place);
}
inline c10::MaybeOwned<Tensor> Tensor::expect_contiguous(MemoryFormat memory_format) const & {
if (is_contiguous(memory_format)) {
return c10::MaybeOwned<Tensor>::borrowed(*this);
} else {
return c10::MaybeOwned<Tensor>::owned(__dispatch_contiguous(memory_format));
}
}
} // namespace at

View File

@ -0,0 +1,61 @@
#include <c10/core/Scalar.h>
#include <ATen/core/TensorBody.h>
#include <c10/util/string_view.h>
namespace at {
namespace {
// Verifies the requested type is the same as the Tensor's type.
void check_type(const TensorBase& tensor, ScalarType type, c10::string_view type_name) {
TORCH_CHECK(
tensor.scalar_type() == type
|| (isQIntType(tensor.scalar_type())
&& toUnderlying(tensor.scalar_type()) == type),
"expected scalar type ", type_name, " but found ", tensor.scalar_type());
}
} // namespace
#define DEFINE_CAST(T, name) \
template <> \
TORCH_API const T* TensorBase::const_data_ptr() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->data_ptr_impl<T>(); \
} \
\
template <> \
TORCH_API const T* TensorBase::const_data_ptr<const T>() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->data_ptr_impl<std::remove_const_t<T>>(); \
} \
\
template <> \
TORCH_API T* TensorBase::mutable_data_ptr() const { \
check_type(*this, ScalarType::name, #name); \
return this->unsafeGetTensorImpl()->mutable_data_ptr_impl<T>(); \
} \
\
template <> \
TORCH_API T* TensorBase::data_ptr() const { \
return mutable_data_ptr<T>(); \
} \
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CAST)
AT_FORALL_QINT_TYPES(DEFINE_CAST)
DEFINE_CAST(uint16_t, UInt16)
DEFINE_CAST(uint32_t, UInt32)
DEFINE_CAST(uint64_t, UInt64)
#undef DEFINE_CAST
#define DEFINE_ITEM(T, name) \
template <> \
TORCH_API T Tensor::item() const { \
return item().to##name(); \
}
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ITEM)
#undef DEFINE_ITEM
} //namespace at

View File

@ -0,0 +1,19 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/DispatchStub.h>
#include <ATen/TensorIterator.h>
#include <ATen/TensorMeta.h>
namespace at {
// NB: this is explicitly copied here (via codegen) rather than
// included via NativeFunctions.h to avoid recompiling this file when
// NativeFunctions.h changes
namespace meta {
${meta_declaration}
}
namespace native {
${native_declaration}
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,14 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/ufunc/${name}.h>
#include <ATen/native/DispatchStub.h>
#include <ATen/TensorIterator.h>
#include <ATen/native/cpu/Loops.h>
#include <ATen/cpu/vec/vec.h>
#include <ATen/Dispatch.h>
#include <c10/core/Scalar.h>
namespace at {
namespace native {
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,21 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/native/ufunc/${name}.h>
#include <ATen/Dispatch.h>
#include <ATen/native/DispatchStub.h>
#include <c10/core/Scalar.h>
${cuda_headers}
namespace at {
// NB: this is explicitly copied here (via codegen) rather than
// included via NativeFunctions.h to avoid recompiling this file when
// NativeFunctions.h changes
namespace meta {
${meta_declaration}
}
namespace native {
${native_declaration}
${native_definitions}
}} // namespace at::native

View File

@ -0,0 +1,35 @@
#include <ATen/UnboxingFunctions.h>
#include <ATen/Functions.h>
#include <ATen/Tensor.h>
#include <ATen/core/functional.h>
#include <ATen/core/interned_strings.h>
#include <ATen/core/ivalue.h>
#include <ATen/core/stack.h>
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstring>
#include <sstream>
#include <stdexcept>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
namespace at {
namespace unboxing {
using ::c10::fmap;
using ::c10::filter;
using torch::jit::peek;
using torch::jit::drop;
using torch::jit::pack;
using torch::jit::pop;
// Generated function declaration
${definitions}
} // namespace unboxing
} // namespace at

View File

@ -0,0 +1,32 @@
// ${generated_comment}
// Generated by tools/jit/gen_unboxing.py. This file declares code generated boxed C++ functions for operators,
// base off of native_functions.yaml (or similar yaml file with the same syntax). The definition of such a boxed
// function will pop out IValues from the stack then convert them into the correct C++ types based on given schema. This
// unboxing logic is an alternative to template-based metaprogramming unboxing.
#pragma once
#include <ATen/ATen.h>
namespace at {
namespace unboxing {
namespace {
template<typename T, size_t N>
std::array<T, N> as_array(const c10::List<c10::IValue>& list) {
std::array<T, N> res;
AT_ASSERT(list.size() == N);
std::vector<T> vec;
for (c10::IValue elem : list) {
vec.push_back(elem.to<T>());
}
std::copy(vec.begin(), vec.end(), res.begin());
return res;
}
} // namespace <anonymous>
using Stack = std::vector<c10::IValue>;
// Generated function declaration
${declarations}
} // namespace unboxing
} // namespace at

View File

@ -0,0 +1,22 @@
#pragma once
// ${generated_comment}
#if defined(TORCH_ASSERT_NO_OPERATORS) || defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS)
#error This change adds a dependency on native_functions.yaml, \
meaning the file will need to be re-compiled every time an operator \
is changed or added. Consider if including <ATen/core/symbol.h> for \
the c10::Symbol class would be sufficient, or if your change would be \
better placed in another file.
#endif
// ATen symbols correspond exactly to operators defined in ATen. Every
// symbol here corresponds exactly to an ATen operation defined in
// native_functions.yaml; attributes are in one-to-one correspondence
// with their ATen name.
#define FORALL_ATEN_BASE_SYMBOLS(_) \
${aten_symbols}
#define FORALL_ATTR_BASE_SYMBOLS(_) \
${attr_symbols}

View File

@ -0,0 +1,10 @@
#pragma once
// ${generated_comment}
namespace at {
// Enum of valid tags obtained from the entries in tags.yaml
enum class Tag {
${enum_of_valid_tags}
};
}