I am done

2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions
--- a/rl/Lib/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h
@ -0,0 +1,56 @@
+#pragma once
+
+#include <c10/core/Device.h>
+#include <c10/core/Stream.h>
+#include <c10/core/Allocator.h>
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
+namespace at {
+
+// AcceleratorHooksInterface is a shared interface provided by all
+// accelerators to allow generic code.
+// This inferface is hook-based as it corresponds to all the functions
+// that are going to be called in a generic way from the CPU code.
+
+struct TORCH_API AcceleratorHooksInterface {
+  // This should never actually be implemented, but it is used to
+  // squelch -Werror=non-virtual-dtor
+  virtual ~AcceleratorHooksInterface() = default;
+
+  // Whether the device at device_index is fully initialized or not.
+  virtual bool hasPrimaryContext(DeviceIndex device_index) const = 0;
+
+  virtual DeviceIndex deviceCount() const {
+    return 0;
+  }
+
+  virtual void setCurrentDevice(DeviceIndex device) const {
+    TORCH_CHECK(false, "Backend doesn't support setCurrentDevice()");
+  }
+
+  virtual DeviceIndex getCurrentDevice() const {
+    TORCH_CHECK(false, "Backend doesn't support getCurrentDevice()");
+    return -1;
+  }
+
+  virtual DeviceIndex exchangeDevice(DeviceIndex device) const {
+    TORCH_CHECK(false, "Backend doesn't support exchangeDevice()");
+    return -1;
+  }
+
+  virtual DeviceIndex maybeExchangeDevice(DeviceIndex device) const {
+    TORCH_CHECK(false, "Backend doesn't support maybeExchangeDevice()");
+    return -1;
+  }
+
+  virtual bool isPinnedPtr(const void* data) const {
+    return false;
+  }
+
+  virtual Allocator* getPinnedMemoryAllocator() const {
+    TORCH_CHECK(false, "Backend doesn't support getPinnedMemoryAllocator()");
+    return nullptr;
+  }
+};
+
+} // namespace at
+C10_DIAGNOSTIC_POP()
--- a/rl/Lib/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h
@ -0,0 +1,211 @@
+#pragma once
+
+#include <c10/core/Allocator.h>
+#include <c10/util/Exception.h>
+#include <c10/util/Registry.h>
+
+#include <ATen/detail/AcceleratorHooksInterface.h>
+
+// Forward-declares at::Generator and at::cuda::NVRTC
+namespace at {
+struct Generator;
+namespace cuda {
+struct NVRTC;
+} // namespace cuda
+} // namespace at
+
+// NB: Class must live in `at` due to limitations of Registry.h.
+namespace at {
+
+#ifdef _MSC_VER
+constexpr const char* CUDA_HELP =
+  "PyTorch splits its backend into two shared libraries: a CPU library "
+  "and a CUDA library; this error has occurred because you are trying "
+  "to use some CUDA functionality, but the CUDA library has not been "
+  "loaded by the dynamic linker for some reason.  The CUDA library MUST "
+  "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! "
+  "One common culprit is a lack of -INCLUDE:?warp_size@cuda@at@@YAHXZ "
+  "in your link arguments; many dynamic linkers will delete dynamic library "
+  "dependencies if you don't depend on any of their symbols.  You can check "
+  "if this has occurred by using link on your binary to see if there is a "
+  "dependency on *_cuda.dll library.";
+#else
+constexpr const char* CUDA_HELP =
+  "PyTorch splits its backend into two shared libraries: a CPU library "
+  "and a CUDA library; this error has occurred because you are trying "
+  "to use some CUDA functionality, but the CUDA library has not been "
+  "loaded by the dynamic linker for some reason.  The CUDA library MUST "
+  "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! "
+  "One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many "
+  "dynamic linkers will delete dynamic library dependencies if you don't "
+  "depend on any of their symbols.  You can check if this has occurred by "
+  "using ldd on your binary to see if there is a dependency on *_cuda.so "
+  "library.";
+#endif
+
+// The CUDAHooksInterface is an omnibus interface for any CUDA functionality
+// which we may want to call into from CPU code (and thus must be dynamically
+// dispatched, to allow for separate compilation of CUDA code).  How do I
+// decide if a function should live in this class?  There are two tests:
+//
+//  1. Does the *implementation* of this function require linking against
+//     CUDA libraries?
+//
+//  2. Is this function *called* from non-CUDA ATen code?
+//
+// (2) should filter out many ostensible use-cases, since many times a CUDA
+// function provided by ATen is only really ever used by actual CUDA code.
+//
+// TODO: Consider putting the stub definitions in another class, so that one
+// never forgets to implement each virtual function in the real implementation
+// in CUDAHooks.  This probably doesn't buy us much though.
+struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
+  // This should never actually be implemented, but it is used to
+  // squelch -Werror=non-virtual-dtor
+  ~CUDAHooksInterface() override = default;
+
+  // Initialize THCState and, transitively, the CUDA state
+  virtual void initCUDA() const {
+    TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual const Generator& getDefaultCUDAGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
+    TORCH_CHECK(false, "Cannot get default CUDA generator without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual Device getDeviceFromPtr(void* /*data*/) const {
+    TORCH_CHECK(false, "Cannot get device of pointer on CUDA without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  virtual bool hasCUDA() const {
+    return false;
+  }
+
+  virtual bool hasCUDART() const {
+    return false;
+  }
+
+  virtual bool hasMAGMA() const {
+    return false;
+  }
+
+  virtual bool hasCuDNN() const {
+    return false;
+  }
+
+  virtual bool hasCuSOLVER() const {
+    return false;
+  }
+
+  virtual bool hasCuBLASLt() const {
+    return false;
+  }
+
+  virtual bool hasROCM() const {
+    return false;
+  }
+
+  virtual const at::cuda::NVRTC& nvrtc() const {
+    TORCH_CHECK(false, "NVRTC requires CUDA. ", CUDA_HELP);
+  }
+
+  bool hasPrimaryContext(DeviceIndex device_index) const override {
+    TORCH_CHECK(false, "Cannot call hasPrimaryContext(", device_index, ") without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual DeviceIndex current_device() const {
+    return -1;
+  }
+
+  Allocator* getPinnedMemoryAllocator() const override {
+    TORCH_CHECK(false, "Pinned memory requires CUDA. ", CUDA_HELP);
+  }
+
+  virtual Allocator* getCUDADeviceAllocator() const {
+    TORCH_CHECK(false, "CUDADeviceAllocator requires CUDA. ", CUDA_HELP);
+  }
+
+  virtual bool compiledWithCuDNN() const {
+    return false;
+  }
+
+  virtual bool compiledWithMIOpen() const {
+    return false;
+  }
+
+  virtual bool supportsDilatedConvolutionWithCuDNN() const {
+    return false;
+  }
+
+  virtual bool supportsDepthwiseConvolutionWithCuDNN() const {
+    return false;
+  }
+
+  virtual bool supportsBFloat16ConvolutionWithCuDNNv8() const {
+    return false;
+  }
+
+  virtual long versionCuDNN() const {
+    TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual long versionCUDART() const {
+    TORCH_CHECK(false, "Cannot query CUDART version without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual std::string showConfig() const {
+    TORCH_CHECK(false, "Cannot query detailed CUDA version without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual double batchnormMinEpsilonCuDNN() const {
+    TORCH_CHECK(false,
+        "Cannot query batchnormMinEpsilonCuDNN() without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual int64_t cuFFTGetPlanCacheMaxSize(DeviceIndex /*device_index*/) const {
+    TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual void cuFFTSetPlanCacheMaxSize(DeviceIndex /*device_index*/, int64_t /*max_size*/) const {
+    TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual int64_t cuFFTGetPlanCacheSize(DeviceIndex /*device_index*/) const {
+    TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual void cuFFTClearPlanCache(DeviceIndex /*device_index*/) const {
+    TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual int getNumGPUs() const {
+    return 0;
+  }
+
+#ifdef USE_ROCM
+  virtual bool isGPUArch(DeviceIndex /*device_index*/, const std::vector<std::string>& /*archs*/) const {
+    TORCH_CHECK(false, "Cannot check GPU arch without ATen_cuda library. ", CUDA_HELP);
+  }
+#endif
+
+  virtual void deviceSynchronize(DeviceIndex /*device_index*/) const {
+    TORCH_CHECK(false, "Cannot synchronize CUDA device without ATen_cuda library. ", CUDA_HELP);
+  }
+};
+
+// NB: dummy argument to suppress "ISO C++11 requires at least one argument
+// for the "..." in a variadic macro"
+struct TORCH_API CUDAHooksArgs {};
+
+TORCH_DECLARE_REGISTRY(CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs);
+#define REGISTER_CUDA_HOOKS(clsname) \
+  C10_REGISTER_CLASS(CUDAHooksRegistry, clsname, clsname)
+
+namespace detail {
+TORCH_API const CUDAHooksInterface& getCUDAHooks();
+} // namespace detail
+} // namespace at
--- a/rl/Lib/site-packages/torch/include/ATen/detail/FunctionTraits.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/FunctionTraits.h
@ -0,0 +1,103 @@
+#pragma once
+
+#include <cstddef>
+#include <tuple>
+
+// Modified from https://stackoverflow.com/questions/7943525/is-it-possible-to-figure-out-the-parameter-type-and-return-type-of-a-lambda
+
+// Fallback, anything with an operator()
+template <typename T>
+struct function_traits : public function_traits<decltype(&T::operator())> {
+};
+
+// Pointers to class members that are themselves functors.
+// For example, in the following code:
+// template <typename func_t>
+// struct S {
+//     func_t f;
+// };
+// template <typename func_t>
+// S<func_t> make_s(func_t f) {
+//     return S<func_t> { .f = f };
+// }
+//
+// auto s = make_s([] (int, float) -> double { /* ... */ });
+//
+// function_traits<decltype(&s::f)> traits;
+template <typename ClassType, typename T>
+struct function_traits<T ClassType::*> : public function_traits<T> {
+};
+
+// Const class member functions
+template <typename ClassType, typename ReturnType, typename... Args>
+struct function_traits<ReturnType(ClassType::*)(Args...) const> : public function_traits<ReturnType(Args...)> {
+};
+
+// Reference types
+template <typename T>
+struct function_traits<T&> : public function_traits<T> {};
+template <typename T>
+struct function_traits<T*> : public function_traits<T> {};
+
+// Free functions
+template <typename ReturnType, typename... Args>
+struct function_traits<ReturnType(Args...)> {
+  // arity is the number of arguments.
+  enum { arity = sizeof...(Args) };
+
+  using ArgsTuple = std::tuple<Args...>;
+  using result_type = ReturnType;
+
+  template <size_t i>
+  struct arg
+  {
+      using type = typename std::tuple_element<i, std::tuple<Args...>>::type;
+      // the i-th argument is equivalent to the i-th tuple element of a tuple
+      // composed of those arguments.
+  };
+};
+
+template <typename T>
+struct nullary_function_traits {
+  using traits = function_traits<T>;
+  using result_type = typename traits::result_type;
+};
+
+template <typename T>
+struct unary_function_traits {
+  using traits = function_traits<T>;
+  using result_type = typename traits::result_type;
+  using arg1_t = typename traits::template arg<0>::type;
+};
+
+template <typename T>
+struct binary_function_traits {
+  using traits = function_traits<T>;
+  using result_type = typename traits::result_type;
+  using arg1_t = typename traits::template arg<0>::type;
+  using arg2_t = typename traits::template arg<1>::type;
+};
+
+
+// Traits for calling with c10::guts::invoke, where member_functions have a first argument of ClassType
+template <typename T>
+struct invoke_traits : public function_traits<T>{
+};
+
+template <typename T>
+struct invoke_traits<T&> : public invoke_traits<T>{
+};
+
+template <typename T>
+struct invoke_traits<T&&> : public invoke_traits<T>{
+};
+
+template <typename ClassType, typename ReturnType, typename... Args>
+struct invoke_traits<ReturnType(ClassType::*)(Args...)> :
+  public function_traits<ReturnType(ClassType&, Args...)> {
+};
+
+template <typename ClassType, typename ReturnType, typename... Args>
+struct invoke_traits<ReturnType(ClassType::*)(Args...) const> :
+  public function_traits<ReturnType(const ClassType&, Args...)> {
+};
--- a/rl/Lib/site-packages/torch/include/ATen/detail/HIPHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/HIPHooksInterface.h
@ -0,0 +1,79 @@
+#pragma once
+
+#include <c10/core/Allocator.h>
+#include <c10/core/GeneratorImpl.h>
+#include <c10/util/Exception.h>
+
+#include <c10/util/Registry.h>
+
+#include <ATen/detail/AcceleratorHooksInterface.h>
+
+#include <memory>
+
+namespace at {
+class Context;
+}
+
+// NB: Class must live in `at` due to limitations of Registry.h.
+namespace at {
+
+// The HIPHooksInterface is an omnibus interface for any HIP functionality
+// which we may want to call into from CPU code (and thus must be dynamically
+// dispatched, to allow for separate compilation of HIP code).  See
+// CUDAHooksInterface for more detailed motivation.
+struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
+  // This should never actually be implemented, but it is used to
+  // squelch -Werror=non-virtual-dtor
+  ~HIPHooksInterface() override = default;
+
+  // Initialize the HIP library state
+  virtual void initHIP() const {
+    AT_ERROR("Cannot initialize HIP without ATen_hip library.");
+  }
+
+  virtual std::unique_ptr<c10::GeneratorImpl> initHIPGenerator(Context*) const {
+    AT_ERROR("Cannot initialize HIP generator without ATen_hip library.");
+  }
+
+  virtual bool hasHIP() const {
+    return false;
+  }
+
+  virtual c10::DeviceIndex current_device() const {
+    return -1;
+  }
+
+  bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  Allocator* getPinnedMemoryAllocator() const override {
+    AT_ERROR("Pinned memory requires HIP.");
+  }
+
+  virtual void registerHIPTypes(Context*) const {
+    AT_ERROR("Cannot registerHIPTypes() without ATen_hip library.");
+  }
+
+  virtual int getNumGPUs() const {
+    return 0;
+  }
+
+  bool hasPrimaryContext(DeviceIndex device_index) const override {
+    AT_ERROR("Cannot check primary context without ATen_hip library.");
+  }
+};
+
+// NB: dummy argument to suppress "ISO C++11 requires at least one argument
+// for the "..." in a variadic macro"
+struct TORCH_API HIPHooksArgs {};
+
+TORCH_DECLARE_REGISTRY(HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs);
+#define REGISTER_HIP_HOOKS(clsname) \
+  C10_REGISTER_CLASS(HIPHooksRegistry, clsname, clsname)
+
+namespace detail {
+TORCH_API const HIPHooksInterface& getHIPHooks();
+
+} // namespace detail
+} // namespace at
--- a/rl/Lib/site-packages/torch/include/ATen/detail/IPUHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/IPUHooksInterface.h
@ -0,0 +1,35 @@
+#pragma once
+
+#include <ATen/core/Generator.h>
+#include <c10/core/Allocator.h>
+#include <c10/util/Exception.h>
+#include <c10/util/Registry.h>
+
+namespace at {
+
+struct TORCH_API IPUHooksInterface {
+  virtual ~IPUHooksInterface() = default;
+
+  virtual const Generator& getDefaultIPUGenerator(
+      DeviceIndex device_index [[maybe_unused]] = -1) const {
+    AT_ERROR(
+        "Cannot get the default IPU generator: the IPU backend is not "
+        "available.");
+  }
+
+  virtual Generator newIPUGenerator(DeviceIndex device_index [[maybe_unused]] = -1) const {
+    AT_ERROR(
+        "Cannot create a new IPU generator: the IPU backend is not available.");
+  }
+};
+
+struct TORCH_API IPUHooksArgs {};
+
+TORCH_DECLARE_REGISTRY(IPUHooksRegistry, IPUHooksInterface, IPUHooksArgs);
+#define REGISTER_IPU_HOOKS(clsname) \
+  C10_REGISTER_CLASS(IPUHooksRegistry, clsname, clsname)
+
+namespace detail {
+TORCH_API const IPUHooksInterface& getIPUHooks();
+} // namespace detail
+} // namespace at
--- a/rl/Lib/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h
@ -0,0 +1,31 @@
+#pragma once
+
+#include <c10/util/Exception.h>
+#include <c10/util/Registry.h>
+
+// NB: Class must live in `at` due to limitations of Registry.h.
+namespace at {
+
+struct TORCH_API MAIAHooksInterface {
+  // This should never actually be implemented, but it is used to
+  // squelch -Werror=non-virtual-dtor
+  virtual ~MAIAHooksInterface() = default;
+
+  virtual std::string showConfig() const {
+    TORCH_CHECK(false, "Cannot query detailed MAIA version information.");
+  }
+};
+
+// NB: dummy argument to suppress "ISO C++11 requires at least one argument
+// for the "..." in a variadic macro"
+struct TORCH_API MAIAHooksArgs {};
+
+TORCH_DECLARE_REGISTRY(MAIAHooksRegistry, MAIAHooksInterface, MAIAHooksArgs);
+#define REGISTER_MAIA_HOOKS(clsname) \
+  C10_REGISTER_CLASS(MAIAHooksRegistry, clsname, clsname)
+
+namespace detail {
+TORCH_API const MAIAHooksInterface& getMAIAHooks();
+} // namespace detail
+
+} // namespace at
--- a/rl/Lib/site-packages/torch/include/ATen/detail/MPSHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/MPSHooksInterface.h
@ -0,0 +1,117 @@
+//  Copyright © 2022 Apple Inc.
+
+#pragma once
+
+#include <c10/core/Allocator.h>
+#include <ATen/core/Generator.h>
+#include <ATen/detail/AcceleratorHooksInterface.h>
+#include <c10/util/Exception.h>
+#include <c10/util/Registry.h>
+
+#include <cstddef>
+
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
+namespace at {
+
+struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
+  // this fails the implementation if MPSHooks functions are called, but
+  // MPS backend is not present.
+  #define FAIL_MPSHOOKS_FUNC(func) \
+    TORCH_CHECK(false, "Cannot execute ", func, "() without MPS backend.");
+
+  ~MPSHooksInterface() override = default;
+
+  // Initialize the MPS library state
+  virtual void initMPS() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual bool hasMPS() const {
+    return false;
+  }
+  virtual bool isOnMacOSorNewer(unsigned major = 13, unsigned minor = 0) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual const Generator& getDefaultMPSGenerator() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual Allocator* getMPSDeviceAllocator() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void deviceSynchronize() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void commitStream() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void* getCommandBuffer() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void* getDispatchQueue() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void emptyCache() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual size_t getCurrentAllocatedMemory() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual size_t getDriverAllocatedMemory() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual size_t getRecommendedMaxMemory() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void setMemoryFraction(double /*ratio*/) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void profilerStartTrace(const std::string& mode, bool waitUntilCompleted) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void profilerStopTrace() const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual uint32_t acquireEvent(bool enable_timing) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void releaseEvent(uint32_t event_id) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void recordEvent(uint32_t event_id) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void waitForEvent(uint32_t event_id) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual void synchronizeEvent(uint32_t event_id) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual bool queryEvent(uint32_t event_id) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  virtual double elapsedTimeOfEvents(uint32_t start_event_id, uint32_t end_event_id) const {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  bool hasPrimaryContext(DeviceIndex device_index) const override {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+  Allocator* getPinnedMemoryAllocator() const override {
+    FAIL_MPSHOOKS_FUNC(__func__);
+  }
+  #undef FAIL_MPSHOOKS_FUNC
+};
+
+struct TORCH_API MPSHooksArgs {};
+
+TORCH_DECLARE_REGISTRY(MPSHooksRegistry, MPSHooksInterface, MPSHooksArgs);
+#define REGISTER_MPS_HOOKS(clsname) \
+  C10_REGISTER_CLASS(MPSHooksRegistry, clsname, clsname)
+
+namespace detail {
+TORCH_API const MPSHooksInterface& getMPSHooks();
+
+} // namespace detail
+} // namespace at
+C10_DIAGNOSTIC_POP()
--- a/rl/Lib/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h
@ -0,0 +1,120 @@
+#pragma once
+
+#include <c10/core/Device.h>
+#include <c10/util/Exception.h>
+
+#include <c10/core/Stream.h>
+#include <c10/util/Registry.h>
+
+#include <c10/core/Allocator.h>
+
+#include <c10/util/python_stub.h>
+#include <ATen/detail/AcceleratorHooksInterface.h>
+
+#include <string>
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
+namespace at {
+class Context;
+}
+
+namespace at {
+constexpr const char* MTIA_HELP =
+    "The MTIA backend requires MTIA extension for PyTorch;"
+    "this error has occurred because you are trying "
+    "to use some MTIA's functionality without MTIA extension included.";
+
+struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {
+// this fails the implementation if MTIAHooks functions are called, but
+// MTIA backend is not present.
+#define FAIL_MTIAHOOKS_FUNC(func) \
+  TORCH_CHECK(false, "Cannot execute ", func, "() without MTIA backend.");
+
+  ~MTIAHooksInterface() override = default;
+
+  virtual void initMTIA() const {
+    // Avoid logging here, since MTIA needs init devices first then it will know
+    // how many devices are available. Make it as no-op if mtia extension is not
+    // dynamically loaded.
+    return;
+  }
+
+  virtual bool hasMTIA() const {
+    return false;
+  }
+
+  DeviceIndex deviceCount() const override {
+    return 0;
+  }
+
+  virtual void deviceSynchronize(c10::DeviceIndex device_index) const {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+  }
+
+  virtual std::string showConfig() const {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+  }
+
+  bool hasPrimaryContext(DeviceIndex device_index) const override {
+    return false;
+  }
+
+  void setCurrentDevice(DeviceIndex device) const override {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+  }
+
+  DeviceIndex getCurrentDevice() const override {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return -1;
+  }
+
+  DeviceIndex exchangeDevice(DeviceIndex device) const override {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return -1;
+  }
+
+  DeviceIndex maybeExchangeDevice(DeviceIndex device) const override {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return -1;
+  }
+
+  virtual c10::Stream getCurrentStream(DeviceIndex device) const {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return c10::Stream::unpack3(-1, 0, c10::DeviceType::MTIA);
+  }
+
+  virtual c10::Stream getDefaultStream(DeviceIndex device) const {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return c10::Stream::unpack3(-1, 0, c10::DeviceType::MTIA);
+  }
+
+  virtual void setCurrentStream(const c10::Stream& stream) const {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+  }
+
+  bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  Allocator* getPinnedMemoryAllocator() const override {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return nullptr;
+  }
+
+  virtual PyObject* memoryStats(DeviceIndex device) const {
+    FAIL_MTIAHOOKS_FUNC(__func__);
+    return nullptr;
+  }
+};
+
+struct TORCH_API MTIAHooksArgs {};
+
+C10_DECLARE_REGISTRY(MTIAHooksRegistry, MTIAHooksInterface, MTIAHooksArgs);
+#define REGISTER_MTIA_HOOKS(clsname) \
+  C10_REGISTER_CLASS(MTIAHooksRegistry, clsname, clsname)
+
+namespace detail {
+TORCH_API const MTIAHooksInterface& getMTIAHooks();
+TORCH_API bool isMTIAHooksBuilt();
+} // namespace detail
+} // namespace at
+C10_DIAGNOSTIC_POP()
--- a/rl/Lib/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h
@ -0,0 +1,67 @@
+#pragma once
+
+#include <ATen/core/Generator.h>
+#include <ATen/detail/AcceleratorHooksInterface.h>
+#include <c10/core/Allocator.h>
+#include <c10/core/Device.h>
+#include <c10/core/Storage.h>
+#include <c10/util/Exception.h>
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
+namespace at {
+
+struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
+  ~PrivateUse1HooksInterface() override = default;
+  virtual const at::Generator& getDefaultGenerator(
+      c10::DeviceIndex device_index) const {
+    TORCH_CHECK_NOT_IMPLEMENTED(
+        false,
+        "You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDefaultGenerator`.");
+  }
+
+  virtual at::Device getDeviceFromPtr(void* data) const {
+    TORCH_CHECK_NOT_IMPLEMENTED(
+        false,
+        "You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDeviceFromPtr`.");
+  }
+
+  virtual bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  virtual Allocator* getPinnedMemoryAllocator() const override {
+    TORCH_CHECK(
+        false,
+        "You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getPinnedMemoryAllocator`.");
+  }
+
+  virtual bool hasPrimaryContext(DeviceIndex device_index) const override {
+    TORCH_CHECK_NOT_IMPLEMENTED(
+        false,
+        "You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `hasPrimaryContext`.");
+  }
+
+  virtual void initPrivateUse1() const {}
+  virtual void resizePrivateUse1Bytes(
+      const c10::Storage& storage,
+      size_t newsize) const {
+    TORCH_CHECK_NOT_IMPLEMENTED(
+        false,
+        "You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `resizePrivateUse1Bytes`.");
+  }
+};
+
+struct TORCH_API PrivateUse1HooksArgs {};
+
+TORCH_API void RegisterPrivateUse1HooksInterface(
+    at::PrivateUse1HooksInterface* hook_);
+
+TORCH_API bool isPrivateUse1HooksRegistered();
+
+namespace detail {
+
+TORCH_API const at::PrivateUse1HooksInterface& getPrivateUse1Hooks();
+
+} // namespace detail
+
+} // namespace at
+C10_DIAGNOSTIC_POP()
--- a/rl/Lib/site-packages/torch/include/ATen/detail/XPUHooksInterface.h
+++ b/rl/Lib/site-packages/torch/include/ATen/detail/XPUHooksInterface.h
@ -0,0 +1,84 @@
+#pragma once
+
+#include <c10/core/Device.h>
+#include <c10/util/Exception.h>
+#include <c10/util/Registry.h>
+
+#include <ATen/core/Generator.h>
+#include <ATen/detail/AcceleratorHooksInterface.h>
+
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
+
+namespace at {
+
+struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
+  ~XPUHooksInterface() override = default;
+
+  virtual void initXPU() const {
+    TORCH_CHECK(
+        false,
+        "Cannot initialize XPU without ATen_xpu library.");
+  }
+
+  virtual bool hasXPU() const {
+    return false;
+  }
+
+  virtual std::string showConfig() const {
+    TORCH_CHECK(
+        false,
+        "Cannot query detailed XPU version without ATen_xpu library.");
+  }
+
+  virtual int32_t getGlobalIdxFromDevice(const Device& device) const {
+    TORCH_CHECK(false, "Cannot get XPU global device index without ATen_xpu library.");
+  }
+
+  virtual Generator getXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
+    TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
+  }
+
+  virtual const Generator& getDefaultXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
+    TORCH_CHECK(false, "Cannot get default XPU generator without ATen_xpu library.");
+  }
+
+  virtual DeviceIndex getNumGPUs() const {
+    return 0;
+  }
+
+  virtual DeviceIndex current_device() const {
+    TORCH_CHECK(false, "Cannot get current device on XPU without ATen_xpu library.");
+  }
+
+  virtual Device getDeviceFromPtr(void* /*data*/) const {
+    TORCH_CHECK(false, "Cannot get device of pointer on XPU without ATen_xpu library.");
+  }
+
+  virtual void deviceSynchronize(DeviceIndex /*device_index*/) const {
+    TORCH_CHECK(false, "Cannot synchronize XPU device without ATen_xpu library.");
+  }
+
+  Allocator* getPinnedMemoryAllocator() const override {
+    TORCH_CHECK(false, "Cannot get XPU pinned memory allocator without ATen_xpu library.");
+  }
+
+  bool isPinnedPtr(const void* data) const override {
+    return false;
+  }
+
+  bool hasPrimaryContext(DeviceIndex device_index) const override {
+    TORCH_CHECK(false, "Cannot query primary context without ATen_xpu library.");
+  }
+};
+
+struct TORCH_API XPUHooksArgs {};
+
+C10_DECLARE_REGISTRY(XPUHooksRegistry, XPUHooksInterface, XPUHooksArgs);
+#define REGISTER_XPU_HOOKS(clsname) \
+  C10_REGISTER_CLASS(XPUHooksRegistry, clsname, clsname)
+
+namespace detail {
+TORCH_API const XPUHooksInterface& getXPUHooks();
+} // namespace detail
+} // namespace at
+C10_DIAGNOSTIC_POP()