I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,504 @@
# mypy: allow-untyped-defs
r"""
This package introduces support for the XPU backend, specifically tailored for
Intel GPU optimization.
This package is lazily initialized, so you can always import it, and use
:func:`is_available()` to determine if your system supports XPU.
"""
import threading
import traceback
from functools import lru_cache
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import torch
import torch._C
from torch import device as _device
from torch._utils import _dummy_type, _LazySeedTracker
from ._utils import _get_device_index
from .streams import Event, Stream
_initialized = False
_tls = threading.local()
_initialization_lock = threading.Lock()
_queued_calls: List[
Tuple[Callable[[], None], List[str]]
] = [] # don't invoke these until initialization occurs
_is_in_bad_fork = getattr(torch._C, "_xpu_isInBadFork", lambda: False)
_device_t = Union[_device, str, int, None]
_lazy_seed_tracker = _LazySeedTracker()
default_generators: Tuple[torch._C.Generator] = () # type: ignore[assignment]
def _is_compiled() -> bool:
r"""Return true if compile with XPU support."""
return torch._C._has_xpu
if _is_compiled():
_XpuDeviceProperties = torch._C._XpuDeviceProperties
_exchange_device = torch._C._xpu_exchangeDevice
_maybe_exchange_device = torch._C._xpu_maybeExchangeDevice
else:
# Define dummy if PyTorch was compiled without XPU
_XpuDeviceProperties = _dummy_type("_XpuDeviceProperties") # type: ignore[assignment, misc]
def _exchange_device(device: int) -> int:
raise NotImplementedError("PyTorch was compiled without XPU support")
def _maybe_exchange_device(device: int) -> int:
raise NotImplementedError("PyTorch was compiled without XPU support")
@lru_cache(maxsize=1)
def device_count() -> int:
r"""Return the number of XPU device available."""
if not _is_compiled():
return 0
return torch._C._xpu_getDeviceCount()
def is_available() -> bool:
r"""Return a bool indicating if XPU is currently available."""
# This function nerver throws.
return device_count() > 0
def is_bf16_supported():
r"""Return a bool indicating if the current XPU device supports dtype bfloat16."""
return True
def is_initialized():
r"""Return whether PyTorch's XPU state has been initialized."""
return _initialized and not _is_in_bad_fork()
def _lazy_call(callable, **kwargs):
if is_initialized():
callable()
else:
global _lazy_seed_tracker
if kwargs.get("seed_all", False):
_lazy_seed_tracker.queue_seed_all(callable, traceback.format_stack())
elif kwargs.get("seed", False):
_lazy_seed_tracker.queue_seed(callable, traceback.format_stack())
else:
# Don't store the actual traceback to avoid memory cycle
_queued_calls.append((callable, traceback.format_stack()))
def init():
r"""Initialize PyTorch's XPU state.
This is a Python API about lazy initialization that avoids initializing
XPU until the first time it is accessed. Does nothing if the XPU state is
already initialized.
"""
_lazy_init()
def _lazy_init():
global _initialized, _queued_calls
if is_initialized() or hasattr(_tls, "is_initializing"):
return
with _initialization_lock:
# This test was was protected via GIL. Double-check whether XPU has
# already been initialized.
if is_initialized():
return
# Stop promptly upon encountering a bad fork error.
if _is_in_bad_fork():
raise RuntimeError(
"Cannot re-initialize XPU in forked subprocess. To use XPU with "
"multiprocessing, you must use the 'spawn' start method"
)
if not _is_compiled():
raise AssertionError("Torch not compiled with XPU enabled")
# This function inits XPU backend and detects bad fork processing.
torch._C._xpu_init()
# Some of the queued calls may reentrantly call _lazy_init(); We need to
# just return without initializing in that case.
_tls.is_initializing = True
for calls in _lazy_seed_tracker.get_calls():
if calls:
_queued_calls.append(calls)
try:
for queued_call, orig_traceback in _queued_calls:
try:
queued_call()
except Exception as e:
msg = (
f"XPU call failed lazily at initialization with error: {str(e)}\n\n"
f"XPU call was originally invoked at:\n\n{''.join(orig_traceback)}"
)
raise Exception(msg) from e # noqa: TRY002
finally:
delattr(_tls, "is_initializing")
_initialized = True
class _DeviceGuard:
def __init__(self, index: int):
self.idx = index
self.prev_idx = -1
def __enter__(self):
self.prev_idx = torch.xpu._exchange_device(self.idx)
def __exit__(self, type: Any, value: Any, traceback: Any):
self.idx = torch.xpu._maybe_exchange_device(self.prev_idx)
return False
class device:
r"""Context-manager that changes the selected device.
Args:
device (torch.device or int or str): device index to select. It's a no-op if
this argument is a negative integer or ``None``.
"""
def __init__(self, device: Any):
self.idx = _get_device_index(device, optional=True)
self.prev_idx = -1
def __enter__(self):
self.prev_idx = torch.xpu._exchange_device(self.idx)
def __exit__(self, type: Any, value: Any, traceback: Any):
self.idx = torch.xpu._maybe_exchange_device(self.prev_idx)
return False
class device_of(device):
r"""Context-manager that changes the current device to that of given object.
You can use both tensors and storages as arguments. If a given object is
not allocated on a XPU, this is a no-op.
Args:
obj (Tensor or Storage): object allocated on the selected device.
"""
def __init__(self, obj):
idx = obj.get_device() if obj.is_xpu else -1
super().__init__(idx)
def set_device(device: _device_t) -> None:
r"""Set the current device.
Args:
device (torch.device or int or str): selected device. This function is a
no-op if this argument is negative.
"""
_lazy_init()
device = _get_device_index(device)
if device >= 0:
torch._C._xpu_setDevice(device)
def get_device_name(device: Optional[_device_t] = None) -> str:
r"""Get the name of a device.
Args:
device (torch.device or int or str, optional): device for which to
return the name. This function is a no-op if this argument is a
negative integer. It uses the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
Returns:
str: the name of the device
"""
return get_device_properties(device).name
@lru_cache(None)
def get_device_capability(device: Optional[_device_t] = None) -> Dict[str, Any]:
r"""Get the xpu capability of a device.
Args:
device (torch.device or int or str, optional): device for which to
return the device capability. This function is a no-op if this
argument is a negative integer. It uses the current device, given by
:func:`~torch.xpu.current_device`, if :attr:`device` is ``None``
(default).
Returns:
Dict[str, Any]: the xpu capability dictionary of the device
"""
props = get_device_properties(device)
return {
prop: getattr(props, prop) for prop in dir(props) if not prop.startswith("__")
}
def get_device_properties(device: Optional[_device_t] = None) -> _XpuDeviceProperties:
r"""Get the properties of a device.
Args:
device (torch.device or int or str): device for which to return the
properties of the device.
Returns:
_XpuDeviceProperties: the properties of the device
"""
_lazy_init()
device = _get_device_index(device, optional=True)
if device < 0 or device >= device_count():
raise AssertionError("Invalid device index")
return _get_device_properties(device) # type: ignore[name-defined] # noqa: F821
def current_device() -> int:
r"""Return the index of a currently selected device."""
_lazy_init()
return torch._C._xpu_getDevice()
def _get_device(device: Union[int, str, torch.device]) -> torch.device:
r"""Return the torch.device type object from the passed in device.
Args:
device (torch.device or int or str): selected device.
"""
if isinstance(device, str):
device = torch.device(device)
elif isinstance(device, int):
device = torch.device("xpu", device)
return device
class StreamContext:
r"""Context-manager that selects a given stream.
All XPU kernels queued within its context will be enqueued on a selected
stream.
Args:
Stream (Stream): selected stream. This manager is a no-op if it's
``None``.
.. note:: Streams are per-device.
"""
cur_stream: Optional["torch.xpu.Stream"]
def __init__(self, stream: Optional["torch.xpu.Stream"]):
self.stream = stream
self.idx = _get_device_index(None, True)
if self.idx is None:
self.idx = -1
def __enter__(self):
cur_stream = self.stream
if cur_stream is None or self.idx == -1:
return
self.src_prev_stream = torch.xpu.current_stream(None)
# If the stream is not on the current device, then set the current stream on the device
if self.src_prev_stream.device != cur_stream.device:
with device(cur_stream.device):
self.dst_prev_stream = torch.xpu.current_stream(cur_stream.device)
torch.xpu.set_stream(cur_stream)
def __exit__(self, type: Any, value: Any, traceback: Any):
cur_stream = self.stream
if cur_stream is None or self.idx == -1:
return
# Reset the stream on the original device and destination device
if self.src_prev_stream.device != cur_stream.device:
torch.xpu.set_stream(self.dst_prev_stream)
torch.xpu.set_stream(self.src_prev_stream)
def stream(stream: Optional["torch.xpu.Stream"]) -> StreamContext:
r"""Wrap around the Context-manager StreamContext that selects a given stream.
Arguments:
stream (Stream): selected stream. This manager is a no-op if it's ``None``.
"""
return StreamContext(stream)
def _set_stream_by_id(stream_id, device_index, device_type):
r"""set stream specified by the stream id, device index and device type
Args: stream_id (int): not visible to the user, used to assigned to the specific stream.
device_index (int): selected device index.
device_type (int): selected device type.
"""
torch._C._xpu_setStream(
stream_id=stream_id,
device_index=device_index,
device_type=device_type,
)
def set_stream(stream: Stream):
r"""Set the current stream.This is a wrapper API to set the stream.
Usage of this function is discouraged in favor of the ``stream``
context manager.
Args:
stream (Stream): selected stream. This function is a no-op
if this argument is ``None``.
"""
if stream is None:
return
_lazy_init()
_set_stream_by_id(
stream_id=stream.stream_id,
device_index=stream.device_index,
device_type=stream.device_type,
)
def current_stream(device: Optional[_device_t] = None) -> Stream:
r"""Return the currently selected :class:`Stream` for a given device.
Args:
device (torch.device or int, optional): selected device. Returns
the currently selected :class:`Stream` for the current device, given
by :func:`~torch.xpu.current_device`, if :attr:`device` is ``None``
(default).
"""
_lazy_init()
streamdata = torch._C._xpu_getCurrentStream(
_get_device_index(device, optional=True)
)
return Stream(
stream_id=streamdata[0], device_index=streamdata[1], device_type=streamdata[2]
)
def synchronize(device: _device_t = None) -> None:
r"""Wait for all kernels in all streams on a XPU device to complete.
Args:
device (torch.device or int, optional): device for which to synchronize.
It uses the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
_lazy_init()
device = _get_device_index(device, optional=True)
return torch._C._xpu_synchronize(device)
def _get_generator(device: torch.device) -> torch._C.Generator:
r"""Return the XPU Generator object for the given device.
Args:
device (torch.device): selected device.
"""
idx = device.index
if idx is None:
idx = current_device()
return torch.xpu.default_generators[idx]
def _set_rng_state_offset(
offset: int, device: Union[int, str, torch.device] = "xpu"
) -> None:
r"""Set the random number generator state offset of the specified GPU.
Args:
offset (int): The desired offset
device (torch.device or int, optional): The device to set the RNG state.
Default: ``'xpu'`` (i.e., ``torch.device('xpu')``, the current XPU device).
"""
final_device = _get_device(device)
def cb():
default_generator = _get_generator(final_device)
default_generator.set_offset(offset)
_lazy_call(cb)
def _get_rng_state_offset(device: Union[int, str, torch.device] = "xpu") -> int:
r"""Return the random number generator state offset of the specified GPU.
Args:
device (torch.device or int, optional): The device to return the RNG state offset of.
Default: ``'xpu'`` (i.e., ``torch.device('xpu')``, the current XPU device).
.. warning::
This function eagerly initializes XPU.
"""
_lazy_init()
final_device = _get_device(device)
default_generator = _get_generator(final_device)
return default_generator.get_offset()
# import here to avoid circular import
from .memory import (
empty_cache,
max_memory_allocated,
max_memory_reserved,
memory_allocated,
memory_reserved,
memory_stats,
memory_stats_as_nested_dict,
reset_accumulated_memory_stats,
reset_peak_memory_stats,
)
from .random import (
get_rng_state,
get_rng_state_all,
initial_seed,
manual_seed,
manual_seed_all,
seed,
seed_all,
set_rng_state,
set_rng_state_all,
)
__all__ = [
"Event",
"Stream",
"StreamContext",
"current_device",
"current_stream",
"default_generators",
"device",
"device_of",
"device_count",
"empty_cache",
"get_device_capability",
"get_device_name",
"get_device_properties",
"get_rng_state",
"get_rng_state_all",
"get_stream",
"init",
"initial_seed",
"is_available",
"is_bf16_supported",
"is_initialized",
"manual_seed",
"manual_seed_all",
"max_memory_allocated",
"max_memory_reserved",
"memory_allocated",
"memory_reserved",
"memory_stats",
"memory_stats_as_nested_dict",
"reset_accumulated_memory_stats",
"reset_peak_memory_stats",
"seed",
"seed_all",
"set_device",
"set_rng_state",
"set_rng_state_all",
"set_stream",
"stream",
"streams",
"synchronize",
]

View File

@ -0,0 +1,75 @@
from typing import Callable
from torch._utils import CallbackRegistry
EventCreationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"XPU event creation"
)
EventDeletionCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"XPU event deletion"
)
EventRecordCallbacks: "CallbackRegistry[int, int]" = CallbackRegistry(
"XPU event record"
)
EventWaitCallbacks: "CallbackRegistry[int, int]" = CallbackRegistry(
"XPU event wait"
)
MemoryAllocationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"XPU memory allocation"
)
MemoryDeallocationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"XPU memory deallocation"
)
StreamCreationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"XPU stream creation"
)
DeviceSynchronizationCallbacks: "CallbackRegistry[[]]" = CallbackRegistry(
"XPU device synchronization"
)
StreamSynchronizationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"XPU stream synchronization"
)
EventSynchronizationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"XPU event synchronization"
)
def register_callback_for_event_creation(cb: Callable[[int], None]) -> None:
EventCreationCallbacks.add_callback(cb)
def register_callback_for_event_deletion(cb: Callable[[int], None]) -> None:
EventDeletionCallbacks.add_callback(cb)
def register_callback_for_event_record(cb: Callable[[int, int], None]) -> None:
EventRecordCallbacks.add_callback(cb)
def register_callback_for_event_wait(cb: Callable[[int, int], None]) -> None:
EventWaitCallbacks.add_callback(cb)
def register_callback_for_memory_allocation(cb: Callable[[int], None]) -> None:
MemoryAllocationCallbacks.add_callback(cb)
def register_callback_for_memory_deallocation(cb: Callable[[int], None]) -> None:
MemoryDeallocationCallbacks.add_callback(cb)
def register_callback_for_stream_creation(cb: Callable[[int], None]) -> None:
StreamCreationCallbacks.add_callback(cb)
def register_callback_for_device_synchronization(cb: Callable[[], None]) -> None:
DeviceSynchronizationCallbacks.add_callback(cb)
def register_callback_for_stream_synchronization(cb: Callable[[int], None]) -> None:
StreamSynchronizationCallbacks.add_callback(cb)
def register_callback_for_event_synchronization(cb: Callable[[int], None]) -> None:
EventSynchronizationCallbacks.add_callback(cb)

View File

@ -0,0 +1,39 @@
from typing import Any
import torch
# The _get_device_index has been moved to torch.utils._get_device_index
from torch._utils import _get_device_index as _torch_get_device_index
def _get_device_index(
device: Any, optional: bool = False, allow_cpu: bool = False
) -> int:
r"""Get the device index from :attr:`device`, which can be a torch.device
object, a Python integer, or ``None``.
If :attr:`device` is a torch.device object, returns the device index if it
is a XPU device. Note that for a XPU device without a specified index,
i.e., ``torch.device('xpu')``, this will return the current default XPU
device if :attr:`optional` is ``True``. If :attr:`allow_cpu` is ``True``,
CPU devices will be accepted and ``-1`` will be returned in this case.
If :attr:`device` is a Python integer, it is returned as is.
If :attr:`device` is ``None``, this will return the current default XPU
device if :attr:`optional` is ``True``.
"""
if isinstance(device, int):
return device
if isinstance(device, str):
device = torch.device(device)
if isinstance(device, torch.device):
if allow_cpu:
if device.type not in ["xpu", "cpu"]:
raise ValueError(f"Expected a xpu or cpu device, but got: {device}")
elif device.type != "xpu":
raise ValueError(f"Expected a xpu device, but got: {device}")
if not torch.jit.is_scripting():
if isinstance(device, torch.xpu.device):
return device.idx
return _torch_get_device_index(device, optional, allow_cpu)

View File

@ -0,0 +1,191 @@
import collections
from typing import Any, Dict, Union
import torch
from torch.types import Device
from . import _get_device_index, is_initialized
_device_t = Union[Device, str, int, None]
def empty_cache() -> None:
r"""Release all unoccupied cached memory currently held by the caching
allocator so that those can be used in other XPU application.
.. note::
:func:`~torch.xpu.empty_cache` doesn't increase the amount of XPU
memory available for PyTorch. However, it may help reduce fragmentation
of XPU memory in certain cases.
"""
if is_initialized():
torch._C._xpu_emptyCache()
def reset_peak_memory_stats(device: _device_t = None) -> None:
r"""Reset the "peak" stats tracked by the XPU memory allocator.
See :func:`~torch.xpu.memory_stats` for details. Peak stats correspond to the
`"peak"` key in each individual stat dict.
Args:
device (torch.device or int or str, optional): selected device. Returns
statistic for the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
device = _get_device_index(device, optional=True)
return torch._C._xpu_resetPeakMemoryStats(device)
def reset_accumulated_memory_stats(device: _device_t = None) -> None:
r"""Reset the "accumulated" (historical) stats tracked by the XPU memory allocator.
See :func:`~torch.xpu.memory_stats` for details. Accumulated stats correspond to
the `"allocated"` and `"freed"` keys in each individual stat dict.
Args:
device (torch.device or int or str, optional): selected device. Returns
statistic for the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
device = _get_device_index(device, optional=True)
return torch._C._xpu_resetAccumulatedMemoryStats(device)
def memory_stats_as_nested_dict(device: _device_t = None) -> Dict[str, Any]:
r"""Return the result of :func:`~torch.xpu.memory_stats` as a nested dictionary."""
if not is_initialized():
return {}
device = _get_device_index(device, optional=True)
return torch._C._xpu_memoryStats(device)
def memory_stats(device: _device_t = None) -> Dict[str, Any]:
r"""Return a dictionary of XPU memory allocator statistics for a given device.
The return value of this function is a dictionary of statistics, each of
which is a non-negative integer.
Core statistics:
- ``"allocated_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
amount of allocated memory.
- ``"reserved_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
amount of reserved memory.
- ``"active_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
amount of active memory.
- ``"requested_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
memory requested by client code, compare this with allocated_bytes to check if
allocation rounding adds too much overhead.
For these core statistics, values are broken down as follows.
Pool type:
- ``all``: combined statistics across all memory pools.
- ``large_pool``: statistics for the large allocation pool (for size >= 1MB allocations).
- ``small_pool``: statistics for the small allocation pool (for size < 1MB allocations).
Metric type:
- ``current``: current value of this metric.
- ``peak``: maximum value of this metric.
- ``allocated``: historical total increase in this metric.
- ``freed``: historical total decrease in this metric.
Args:
device (torch.device or int or str, optional): selected device. Returns
statistics for the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
result = []
def _recurse_add_to_result(prefix: str, obj: Any) -> None:
if isinstance(obj, dict):
if len(prefix) > 0:
prefix += "."
for k, v in obj.items():
_recurse_add_to_result(prefix + k, v)
else:
result.append((prefix, obj))
stats = memory_stats_as_nested_dict(device=device)
_recurse_add_to_result("", stats)
result.sort()
return collections.OrderedDict(result)
def memory_allocated(device: _device_t = None) -> int:
r"""Return the current GPU memory occupied by tensors in bytes for a given device.
Args:
device (torch.device or int or str, optional): selected device. Returns
statistic for the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
.. note::
This is likely less than the amount shown in `xpu-smi` since some
unused memory can be held by the caching allocator and some context
needs to be created on GPU.
"""
return memory_stats(device=device).get("allocated_bytes.all.current", 0)
def max_memory_allocated(device: _device_t = None) -> int:
r"""Return the maximum GPU memory occupied by tensors in bytes for a given device.
By default, this returns the peak allocated memory since the beginning of
this program. :func:`~torch.xpu.reset_peak_memory_stats` can be used to
reset the starting point in tracking this metric. For example, these two
functions can measure the peak allocated memory usage of each iteration in a
training loop.
Args:
device (torch.device or int or str, optional): selected device. Returns
statistic for the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
return memory_stats(device=device).get("allocated_bytes.all.peak", 0)
def memory_reserved(device: _device_t = None) -> int:
r"""Return the current GPU memory managed by the caching allocator in bytes for a given device.
Args:
device (torch.device or int or str, optional): selected device. Returns
statistic for the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
return memory_stats(device=device).get("reserved_bytes.all.current", 0)
def max_memory_reserved(device: _device_t = None) -> int:
r"""Return the maximum GPU memory managed by the caching allocator in bytes for a given device.
By default, this returns the peak cached memory since the beginning of this
program. :func:`~torch.xpu.reset_peak_memory_stats` can be used to reset
the starting point in tracking this metric. For example, these two functions
can measure the peak cached memory amount of each iteration in a training
loop.
Args:
device (torch.device or int or str, optional): selected device. Returns
statistic for the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
return memory_stats(device=device).get("reserved_bytes.all.peak", 0)
__all__ = [
"empty_cache",
"max_memory_allocated",
"max_memory_reserved",
"memory_allocated",
"memory_reserved",
"memory_stats",
"memory_stats_as_nested_dict",
"reset_accumulated_memory_stats",
"reset_peak_memory_stats",
]

View File

@ -0,0 +1,178 @@
# mypy: allow-untyped-defs
from typing import Iterable, List, Union
import torch
from torch import Tensor
from . import _lazy_call, _lazy_init, current_device, device_count
def get_rng_state(device: Union[int, str, torch.device] = "xpu") -> Tensor:
r"""Return the random number generator state of the specified GPU as a ByteTensor.
Args:
device (torch.device or int, optional): The device to return the RNG state of.
Default: ``'xpu'`` (i.e., ``torch.device('xpu')``, the current XPU device).
.. warning::
This function eagerly initializes XPU.
"""
_lazy_init()
if isinstance(device, str):
device = torch.device(device)
elif isinstance(device, int):
device = torch.device("xpu", device)
idx = device.index
if idx is None:
idx = current_device()
default_generator = torch.xpu.default_generators[idx]
return default_generator.get_state()
def get_rng_state_all() -> List[Tensor]:
r"""Return a list of ByteTensor representing the random number states of all devices."""
results = []
for i in range(device_count()):
results.append(get_rng_state(i))
return results
def set_rng_state(
new_state: Tensor, device: Union[int, str, torch.device] = "xpu"
) -> None:
r"""Set the random number generator state of the specified GPU.
Args:
new_state (torch.ByteTensor): The desired state
device (torch.device or int, optional): The device to set the RNG state.
Default: ``'xpu'`` (i.e., ``torch.device('xpu')``, the current XPU device).
"""
with torch._C._DisableFuncTorch():
new_state_copy = new_state.clone(memory_format=torch.contiguous_format)
if isinstance(device, str):
device = torch.device(device)
elif isinstance(device, int):
device = torch.device("xpu", device)
def cb():
idx = device.index
if idx is None:
idx = current_device()
default_generator = torch.xpu.default_generators[idx]
default_generator.set_state(new_state_copy)
_lazy_call(cb)
def set_rng_state_all(new_states: Iterable[Tensor]) -> None:
r"""Set the random number generator state of all devices.
Args:
new_states (Iterable of torch.ByteTensor): The desired state for each device.
"""
for i, state in enumerate(new_states):
set_rng_state(state, i)
def manual_seed(seed: int) -> None:
r"""Set the seed for generating random numbers for the current GPU.
It's safe to call this function if XPU is not available; in that case, it is silently ignored.
Args:
seed (int): The desired seed.
.. warning::
If you are working with a multi-GPU model, this function is insufficient
to get determinism. To seed all GPUs, use :func:`manual_seed_all`.
"""
seed = int(seed)
def cb():
idx = current_device()
default_generator = torch.xpu.default_generators[idx]
default_generator.manual_seed(seed)
_lazy_call(cb, seed=True)
def manual_seed_all(seed: int) -> None:
r"""Set the seed for generating random numbers on all GPUs.
It's safe to call this function if XPU is not available; in that case, it is silently ignored.
Args:
seed (int): The desired seed.
"""
seed = int(seed)
def cb():
for i in range(device_count()):
default_generator = torch.xpu.default_generators[i]
default_generator.manual_seed(seed)
_lazy_call(cb, seed_all=True)
def seed() -> None:
r"""Set the seed for generating random numbers to a random number for the current GPU.
It's safe to call this function if XPU is not available; in that case, it is silently ignored.
.. warning::
If you are working with a multi-GPU model, this function will only initialize
the seed on one GPU. To initialize all GPUs, use :func:`seed_all`.
"""
def cb():
idx = current_device()
default_generator = torch.xpu.default_generators[idx]
default_generator.seed()
_lazy_call(cb)
def seed_all() -> None:
r"""Set the seed for generating random numbers to a random number on all GPUs.
It's safe to call this function if XPU is not available; in that case, it is silently ignored.
"""
def cb():
random_seed = 0
seeded = False
for i in range(device_count()):
default_generator = torch.xpu.default_generators[i]
if not seeded:
default_generator.seed()
random_seed = default_generator.initial_seed()
seeded = True
else:
default_generator.manual_seed(random_seed)
_lazy_call(cb)
def initial_seed() -> int:
r"""Return the current random seed of the current GPU.
.. warning::
This function eagerly initializes XPU.
"""
_lazy_init()
idx = current_device()
default_generator = torch.xpu.default_generators[idx]
return default_generator.initial_seed()
__all__ = [
"get_rng_state",
"get_rng_state_all",
"set_rng_state",
"set_rng_state_all",
"manual_seed",
"manual_seed_all",
"seed",
"seed_all",
"initial_seed",
]

View File

@ -0,0 +1,171 @@
# mypy: allow-untyped-defs
import ctypes
import torch
from torch._streambase import _EventBase, _StreamBase
from .._utils import _dummy_type
if not hasattr(torch._C, "_XpuStreamBase"):
# Define dummy base classes
torch._C.__dict__["_XpuStreamBase"] = _dummy_type("_XpuStreamBase")
torch._C.__dict__["_XpuEventBase"] = _dummy_type("_XpuEventBase")
class Stream(torch._C._XpuStreamBase, _StreamBase):
r"""Wrapper around a XPU stream.
A XPU stream is a linear sequence of execution that belongs to a specific
device, independent from other streams.
Args:
device(torch.device or int, optional): a device on which to allocate
the stream. If :attr:`device` is ``None`` (default) or a negative
integer, this will use the current device.
priority(int, optional): priority of the stream, should be 0 or
negative, where negative numbers indicate higher priority. By default,
streams have priority 0.
"""
def __new__(cls, device=None, priority=0, **kwargs):
# setting device manager is expensive, so we avoid it unless necessary
if device is None or ("stream_id" in kwargs and "device_index" in kwargs):
return super().__new__(cls, priority=priority, **kwargs)
else:
with torch.xpu.device(device):
return super().__new__(cls, priority=priority, **kwargs)
def wait_event(self, event) -> None:
r"""Make all future work submitted to the stream wait for an event.
Args:
event (torch.xpu.Event): an event to wait for.
"""
event.wait(self)
def wait_stream(self, stream) -> None:
r"""Synchronize with another stream.
All future work submitted to this stream will wait until all kernels
submitted to a given stream at the time of call complete.
Args:
stream (Stream): a stream to synchronize.
"""
self.wait_event(stream.record_event())
def record_event(self, event=None):
r"""Record an event.
Args:
event (torch.xpu.Event, optional): event to record. If not given, a new one
will be allocated.
Returns:
Recorded event.
"""
if event is None:
event = Event()
event.record(self)
return event
def query(self) -> bool:
r"""Check if all the work submitted has been completed.
Returns:
A boolean indicating if all kernels in this stream are completed.
"""
return super().query()
def synchronize(self) -> None:
r"""Wait for all the kernels in this stream to complete."""
super().synchronize()
@property
def _as_parameter_(self):
return ctypes.c_void_p(self.sycl_queue)
def __eq__(self, o):
if isinstance(o, Stream):
return super().__eq__(o)
return False
def __hash__(self):
return hash((self.sycl_queue, self.device))
def __repr__(self):
return f"torch.xpu.Stream(device={self.device} sycl_queue={self.sycl_queue:#x})"
class Event(torch._C._XpuEventBase, _EventBase):
r"""Wrapper around a XPU event.
XPU events are synchronization markers that can be used to monitor the
device's progress, and to synchronize XPU streams.
The underlying XPU events are lazily initialized when the event is first
recorded. After creation, only streams on the same device may record the
event. However, streams on any device can wait on the event.
Args:
enable_timing (bool, optional): indicates if the event should measure time
(default: ``False``)
"""
def __new__(cls, enable_timing=False):
return super().__new__(cls, enable_timing=enable_timing)
def record(self, stream=None) -> None:
r"""Record the event in a given stream.
Uses ``torch.xpu.current_stream()`` if no stream is specified. The
stream's device must match the event's device.
"""
if stream is None:
stream = torch.xpu.current_stream()
super().record(stream)
def wait(self, stream=None) -> None:
r"""Make all future work submitted to the given stream wait for this event.
Use ``torch.xpu.current_stream()`` if no stream is specified.
"""
if stream is None:
stream = torch.xpu.current_stream()
super().wait(stream)
def query(self) -> bool:
r"""Check if all work currently captured by event has completed.
Returns:
A boolean indicating if all work currently captured by event has
completed.
"""
return super().query()
def elapsed_time(self, end_event):
r"""Return the time elapsed.
Time reported in milliseconds after the event was recorded and
before the end_event was recorded.
"""
return super().elapsed_time(end_event)
def synchronize(self) -> None:
r"""Wait for the event to complete.
Waits until the completion of all work currently captured in this event.
This prevents the CPU thread from proceeding until the event completes.
"""
super().synchronize()
@property
def _as_parameter_(self):
return ctypes.c_void_p(self.sycl_event)
def __repr__(self):
if self.sycl_event:
return f"torch.xpu.Event(sycl_event={self.sycl_event:#x})"
else:
return "torch.xpu.Event(uninitialized)"