I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,23 @@
#pragma once
#include <ATen/core/CachingHostAllocator.h>
#include <ATen/xpu/XPUEvent.h>
#include <c10/core/Allocator.h>
#include <c10/xpu/XPUStream.h>
namespace at::xpu {
TORCH_XPU_API c10::Allocator* getCachingHostAllocator();
TORCH_XPU_API bool CachingHostAllocator_recordEvent(
void* ptr,
void* ctx,
c10::xpu::XPUStream stream);
TORCH_XPU_API void CachingHostAllocator_emptyCache();
inline TORCH_XPU_API at::DataPtr HostAlloc(size_t size) {
return getCachingHostAllocator()->allocate(size);
}
} // namespace at::xpu

View File

@ -0,0 +1,11 @@
#pragma once
#include <ATen/xpu/CachingHostAllocator.h>
#include <c10/core/Allocator.h>
namespace at::xpu {
inline TORCH_XPU_API at::Allocator* getPinnedMemoryAllocator() {
return getCachingHostAllocator();
}
} // namespace at::xpu

View File

@ -0,0 +1,20 @@
#pragma once
#include <ATen/Context.h>
#include <c10/xpu/XPUFunctions.h>
#include <c10/xpu/XPUStream.h>
namespace at::xpu {
// XPU is available if we compiled with XPU.
inline bool is_available() {
return c10::xpu::device_count() > 0;
}
TORCH_XPU_API DeviceProp* getCurrentDeviceProperties();
TORCH_XPU_API DeviceProp* getDeviceProperties(DeviceIndex device);
TORCH_XPU_API int32_t getGlobalIdxFromDevice(DeviceIndex device);
} // namespace at::xpu

View File

@ -0,0 +1,13 @@
#pragma once
#include <ATen/Context.h>
#include <c10/xpu/XPUFunctions.h>
namespace at::xpu {
inline Device getDeviceFromPtr(void* ptr) {
auto device = c10::xpu::get_device_idx_from_pointer(ptr);
return {c10::DeviceType::XPU, device};
}
} // namespace at::xpu

View File

@ -0,0 +1,166 @@
#pragma once
#include <ATen/xpu/XPUContext.h>
#include <optional>
namespace at::xpu {
/*
* XPUEvent are movable not copyable wrappers around SYCL event. XPUEvent are
* constructed lazily when first recorded. It has a device, and this device is
* acquired from the first recording stream. Later streams that record the event
* must match the same device.
*
* Currently, XPUEvent does NOT support to export an inter-process event from
* another process via inter-process comunication(IPC). So it means that
* inter-process communication for event handles between different processes is
* not available. This could impact some applications that rely on cross-process
* synchronization and communication.
*/
struct TORCH_XPU_API XPUEvent {
// Constructors
XPUEvent(bool enable_timing = false) noexcept
: enable_timing_{enable_timing} {}
~XPUEvent() {
if (isCreated()) {
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
(*interp)->trace_gpu_event_deletion(
at::kXPU, reinterpret_cast<uintptr_t>(event_.get()));
}
}
}
XPUEvent(const XPUEvent&) = delete;
XPUEvent& operator=(const XPUEvent&) = delete;
XPUEvent(XPUEvent&& other) = default;
XPUEvent& operator=(XPUEvent&& other) = default;
operator sycl::event&() const {
return event();
}
std::optional<at::Device> device() const {
if (isCreated()) {
return at::Device(at::kXPU, device_index_);
} else {
return std::nullopt;
}
}
inline bool isCreated() const {
return (event_.get() != nullptr);
}
DeviceIndex device_index() const {
return device_index_;
}
sycl::event& event() const {
return *event_;
}
bool query() const {
using namespace sycl::info;
if (!isCreated()) {
return true;
}
return event().get_info<event::command_execution_status>() ==
event_command_status::complete;
}
void record() {
record(getCurrentXPUStream());
}
void recordOnce(const XPUStream& stream) {
if (!isCreated()) {
record(stream);
}
}
void record(const XPUStream& stream) {
if (!isCreated()) {
device_index_ = stream.device_index();
event_ = std::make_unique<sycl::event>(
stream.queue().ext_oneapi_submit_barrier());
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
(*interp)->trace_gpu_event_creation(
at::kXPU, reinterpret_cast<uintptr_t>(event_.get()));
}
} else {
TORCH_CHECK(
device_index_ == stream.device_index(),
"Event device ",
device_index_,
" does not match recording stream's device ",
stream.device_index(),
".");
event_.reset();
event_ = std::make_unique<sycl::event>(
stream.queue().ext_oneapi_submit_barrier());
}
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
(*interp)->trace_gpu_event_record(
at::kXPU,
reinterpret_cast<uintptr_t>(event_.get()),
reinterpret_cast<uintptr_t>(&stream.queue()));
}
}
void block(const XPUStream& stream) {
if (isCreated()) {
std::vector<sycl::event> event_list{event()};
// Make this stream wait until event_ is completed.
stream.queue().ext_oneapi_submit_barrier(event_list);
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
(*interp)->trace_gpu_event_wait(
at::kXPU,
reinterpret_cast<uintptr_t>(event_.get()),
reinterpret_cast<uintptr_t>(&stream.queue()));
}
}
}
float elapsed_time(const XPUEvent& other) const {
TORCH_CHECK(
isCreated() && other.isCreated(),
"Both events must be recorded before calculating elapsed time.");
TORCH_CHECK(
query() && other.query(),
"Both events must be completed before calculating elapsed time.");
TORCH_CHECK(
enable_timing_ && other.enable_timing_,
"Both events must be created with argument 'enable_timing=True'.");
// TODO: provides the ability to time the execution of commands in a SYCL
// queue without enabling profiling on the entire queue
TORCH_CHECK_NOT_IMPLEMENTED(
false, "elapsed_time is not supported by XPUEvent.");
}
void synchronize() const {
if (isCreated()) {
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
(*interp)->trace_gpu_event_synchronization(
at::kXPU, reinterpret_cast<uintptr_t>(event_.get()));
}
event().wait_and_throw();
}
}
private:
bool enable_timing_ = false;
DeviceIndex device_index_ = -1;
// Only need to track the last event, as events in an in-order queue are
// executed sequentially.
std::unique_ptr<sycl::event> event_;
};
} // namespace at::xpu

View File

@ -0,0 +1,39 @@
#pragma once
#include <ATen/core/Generator.h>
namespace at {
struct TORCH_XPU_API XPUGeneratorImpl : public GeneratorImpl {
// Constructors
XPUGeneratorImpl(DeviceIndex device_index = -1);
~XPUGeneratorImpl() override = default;
// XPUGeneratorImpl methods
std::shared_ptr<XPUGeneratorImpl> clone() const;
void set_current_seed(uint64_t seed) override;
void set_offset(uint64_t offset) override;
uint64_t get_offset() const override;
uint64_t current_seed() const override;
uint64_t seed() override;
void set_state(const c10::TensorImpl& new_state) override;
c10::intrusive_ptr<c10::TensorImpl> get_state() const override;
void set_philox_offset_per_thread(uint64_t offset);
uint64_t philox_offset_per_thread() const;
std::pair<uint64_t, uint64_t> philox_engine_inputs(uint64_t increment);
static c10::DeviceType device_type();
private:
XPUGeneratorImpl* clone_impl() const override;
uint64_t seed_ = default_rng_seed_val;
uint64_t philox_offset_per_thread_ = 0;
};
namespace xpu::detail {
TORCH_XPU_API const Generator& getDefaultXPUGenerator(DeviceIndex device = -1);
TORCH_XPU_API Generator createXPUGenerator(DeviceIndex device = -1);
} // namespace xpu::detail
} // namespace at

View File

@ -0,0 +1,26 @@
#pragma once
#include <ATen/detail/XPUHooksInterface.h>
namespace at::xpu::detail {
// The real implementation of XPUHooksInterface
struct XPUHooks : public at::XPUHooksInterface {
XPUHooks(at::XPUHooksArgs) {}
void initXPU() const override;
bool hasXPU() const override;
std::string showConfig() const override;
int32_t getGlobalIdxFromDevice(const at::Device& device) const override;
Generator getXPUGenerator(DeviceIndex device_index = -1) const override;
const Generator& getDefaultXPUGenerator(
DeviceIndex device_index = -1) const override;
Device getDeviceFromPtr(void* data) const override;
c10::DeviceIndex getNumGPUs() const override;
DeviceIndex current_device() const override;
void deviceSynchronize(DeviceIndex device_index) const override;
Allocator* getPinnedMemoryAllocator() const override;
bool isPinnedPtr(const void* data) const override;
bool hasPrimaryContext(DeviceIndex device_index) const override;
};
} // namespace at::xpu::detail