I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
from ctypes import c_void_p
from torch import Tensor
# Defined in torch/csrc/inductor/aoti_runner/pybind.cpp
# Tensor to AtenTensorHandle
def unsafe_alloc_void_ptrs_from_tensors(tensors: list[Tensor]) -> list[c_void_p]: ...
def unsafe_alloc_void_ptr_from_tensor(tensor: Tensor) -> c_void_p: ...
# AtenTensorHandle to Tensor
def alloc_tensors_by_stealing_from_void_ptrs(
handles: list[c_void_p],
) -> list[Tensor]: ...
def alloc_tensor_by_stealing_from_void_ptr(
handle: c_void_p,
) -> Tensor: ...
class AOTIModelContainerRunnerCpu: ...
class AOTIModelContainerRunnerCuda: ...

View File

@ -0,0 +1,135 @@
# mypy: allow-untyped-defs
from enum import Enum
from typing import Any, Callable
import torch
from torch._C._profiler import (
_ProfilerEvent,
ActiveProfilerType,
ProfilerActivity,
ProfilerConfig,
)
# Defined in torch/csrc/autograd/init.cpp
class DeviceType(Enum):
CPU = ...
CUDA = ...
XPU = ...
MKLDNN = ...
OPENGL = ...
OPENCL = ...
IDEEP = ...
HIP = ...
FPGA = ...
MAIA = ...
XLA = ...
MTIA = ...
MPS = ...
HPU = ...
Meta = ...
Vulkan = ...
Metal = ...
PrivateUse1 = ...
class ProfilerEvent:
def cpu_elapsed_us(self, other: ProfilerEvent) -> float: ...
def cpu_memory_usage(self) -> int: ...
def cuda_elapsed_us(self, other: ProfilerEvent) -> float: ...
def privateuse1_elapsed_us(self, other: ProfilerEvent) -> float: ...
def cuda_memory_usage(self) -> int: ...
def device(self) -> int: ...
def handle(self) -> int: ...
def has_cuda(self) -> bool: ...
def is_remote(self) -> bool: ...
def kind(self) -> int: ...
def name(self) -> str: ...
def node_id(self) -> int: ...
def sequence_nr(self) -> int: ...
def shapes(self) -> list[list[int]]: ...
def thread_id(self) -> int: ...
def flops(self) -> float: ...
def is_async(self) -> bool: ...
class _KinetoEvent:
def name(self) -> str: ...
def device_index(self) -> int: ...
def device_resource_id(self) -> int: ...
def start_ns(self) -> int: ...
def end_ns(self) -> int: ...
def duration_ns(self) -> int: ...
def is_async(self) -> bool: ...
def linked_correlation_id(self) -> int: ...
def shapes(self) -> list[list[int]]: ...
def dtypes(self) -> list[str]: ...
def concrete_inputs(self) -> list[Any]: ...
def kwinputs(self) -> dict[str, Any]: ...
def device_type(self) -> DeviceType: ...
def start_thread_id(self) -> int: ...
def end_thread_id(self) -> int: ...
def correlation_id(self) -> int: ...
def fwd_thread_id(self) -> int: ...
def stack(self) -> list[str]: ...
def scope(self) -> int: ...
def sequence_nr(self) -> int: ...
def flops(self) -> int: ...
def cuda_elapsed_us(self) -> int: ...
def privateuse1_elapsed_us(self) -> int: ...
def is_user_annotation(self) -> bool: ...
class _ProfilerResult:
def events(self) -> list[_KinetoEvent]: ...
def legacy_events(self) -> list[list[ProfilerEvent]]: ...
def save(self, path: str) -> None: ...
def experimental_event_tree(self) -> list[_ProfilerEvent]: ...
def trace_start_ns(self) -> int: ...
class SavedTensor: ...
def _enable_profiler(
config: ProfilerConfig,
activities: set[ProfilerActivity],
) -> None: ...
def _prepare_profiler(
config: ProfilerConfig,
activities: set[ProfilerActivity],
) -> None: ...
def _toggle_collection_dynamic(
enable: bool,
activities: set[ProfilerActivity],
) -> None: ...
def _disable_profiler() -> _ProfilerResult: ...
def _profiler_enabled() -> bool: ...
def _add_metadata_json(key: str, value: str) -> None: ...
def _kineto_step() -> None: ...
def _get_current_graph_task_keep_graph() -> bool: ...
def _get_sequence_nr() -> int: ...
def kineto_available() -> bool: ...
def _record_function_with_args_enter(name: str, *args) -> torch.Tensor: ...
def _record_function_with_args_exit(handle: torch.Tensor) -> None: ...
def _supported_activities() -> set[ProfilerActivity]: ...
def _enable_record_function(enable: bool) -> None: ...
def _set_empty_test_observer(is_global: bool, sampling_prob: float) -> None: ...
def _push_saved_tensors_default_hooks(
pack_hook: Callable[[torch.Tensor], Any],
unpack_hook: Callable[[Any], torch.Tensor],
) -> None: ...
def _pop_saved_tensors_default_hooks() -> None: ...
def _unsafe_set_version_counter(t: torch.Tensor, prev_version: int) -> None: ...
def _enable_profiler_legacy(config: ProfilerConfig) -> None: ...
def _disable_profiler_legacy() -> list[list[ProfilerEvent]]: ...
def _profiler_type() -> ActiveProfilerType: ...
def _saved_tensors_hooks_enable() -> None: ...
def _saved_tensors_hooks_disable(message: str) -> None: ...
def _saved_tensors_hooks_get_disabled_error_message() -> str | None: ...
def _saved_tensors_hooks_set_tracing(is_tracing: bool) -> bool: ...
class CreationMeta(Enum):
DEFAULT = ...
IN_CUSTOM_FUNCTION = ...
MULTI_OUTPUT_NODE = ...
NO_GRAD_MODE = ...
INFERENCE_MODE = ...
def _set_creation_meta(t: torch.Tensor, creation_meta: CreationMeta) -> None: ...
def _get_creation_meta(t: torch.Tensor) -> CreationMeta: ...

View File

@ -0,0 +1,12 @@
from torch.types import _bool, _int
# Defined in torch/csrc/cpu/Module.cpp
def _is_avx2_supported() -> _bool: ...
def _is_avx512_supported() -> _bool: ...
def _is_avx512_vnni_supported() -> _bool: ...
def _is_avx512_bf16_supported() -> _bool: ...
def _is_amx_tile_supported() -> _bool: ...
def _init_amx() -> _bool: ...
def _L1d_cache_size() -> _int: ...
def _L2_cache_size() -> _int: ...

View File

@ -0,0 +1,17 @@
from enum import Enum
from torch.types import _bool, Tuple
# Defined in torch/csrc/cuda/shared/cudnn.cpp
is_cuda: _bool
def getRuntimeVersion() -> Tuple[int, int, int]: ...
def getCompileVersion() -> Tuple[int, int, int]: ...
def getVersionInt() -> int: ...
class RNNMode(int, Enum):
value: int
rnn_relu = ...
rnn_tanh = ...
lstm = ...
gru = ...

View File

@ -0,0 +1 @@
def getVersionInt() -> int: ...

View File

@ -0,0 +1,27 @@
# mypy: allow-untyped-defs
from typing import Any
import torch
# This module is defined in torch/csrc/distributed/autograd/init.cpp
class DistAutogradContext:
def _context_id(self) -> int: ...
def _recv_functions(self) -> dict[int, Any]: ...
def _send_functions(self) -> dict[int, Any]: ...
def _known_worker_ids(self) -> set[int]: ...
def _new_context() -> DistAutogradContext: ...
def _release_context(context_id: int) -> None: ...
def _get_max_id() -> int: ...
def _is_valid_context(worker_id: int) -> bool: ...
def _retrieve_context(context_id: int) -> DistAutogradContext: ...
def _current_context() -> DistAutogradContext: ...
def _init(worker_id: int) -> None: ...
def _get_debug_info() -> dict[str, str]: ...
def backward(
context_id: int,
roots: list[torch.Tensor],
retain_graph=False,
) -> None: ...
def get_gradients(context_id: int) -> dict[torch.Tensor, torch.Tensor]: ...

View File

@ -0,0 +1,699 @@
# mypy: allow-untyped-defs
# mypy: disable-error-code="type-arg"
from datetime import timedelta
from enum import Enum
from typing import Any, Optional, overload
import torch
from torch import Tensor
from torch._C import ScriptObject
from torch.futures import Future
# This module is defined in torch/csrc/distributed/c10d/init.cpp
_DEFAULT_FIRST_BUCKET_BYTES: int
_DEFAULT_NO_TIMEOUT: timedelta
_DEFAULT_PG_TIMEOUT: timedelta
_DEFAULT_PG_NCCL_TIMEOUT: timedelta
class BuiltinCommHookType(Enum):
ALLREDUCE = ...
FP16_COMPRESS = ...
def _register_comm_hook(reducer: Reducer, state: Any, comm_hook: Any): ...
def _register_builtin_comm_hook(
reducer: Reducer,
comm_hook_type: BuiltinCommHookType,
): ...
def _set_global_rank(rank: int) -> None: ...
def _hash_tensors(tensors: list[Tensor]) -> int: ...
class GradBucket:
def index(self) -> int: ...
def buffer(self) -> Tensor: ...
def gradients(self) -> list[Tensor]: ...
def is_last(self) -> bool: ...
def set_buffer(self, tensor: Tensor) -> None: ...
def parameters(self) -> list[Tensor]: ...
class Reducer:
def __init__(
self,
params: list[Tensor],
bucket_indices: list[list[int]],
per_bucket_size_limits: list[int],
process_group: ProcessGroup,
expect_sparse_gradients: list[bool] = ...,
bucket_bytes_cap: int = ..., # kDefaultBucketBytesCap in reducer.hpp
find_unused_parameters: bool = ...,
gradient_as_bucket_view: bool = ...,
param_to_name_mapping: dict[int, str] = ...,
first_bucket_types_cap: int = ..., # kDefaultFirstBucketBytes in reducer.hpp
) -> None: ...
def prepare_for_forward(self) -> None: ...
def prepare_for_backward(self, output: list[Tensor]) -> None: ...
def get_backward_stats(self) -> list[int]: ...
def _install_post_backward_futures(self, futures: list[Future]) -> None: ...
def _rebuild_buckets(self) -> bool: ...
def _get_zeros_like_grad_buckets(self) -> list[GradBucket]: ...
def _push_all_rebuilt_params(self) -> None: ...
def _set_forward_pass_work_handle(
self,
work: Work,
use_static_world_size: bool,
): ...
def _get_local_used_map(self) -> Tensor: ...
def _set_ddp_runtime_logging_sample_rate(self, sample_rate: int) -> None: ...
def _set_static_graph(self) -> None: ...
def _run_comm_hook(self, bucket: GradBucket) -> Future: ...
def set_logger(self, logger: Logger) -> None: ...
def _remove_autograd_hooks(self) -> None: ...
def _check_reducer_finalized(self) -> None: ...
def _set_sparse_metadata(self, global_unique_ids: dict[str, Tensor]) -> None: ...
def _reset_state(self) -> None: ...
def _update_process_group(self, new_process_group: ProcessGroup) -> None: ...
class DDPLoggingData:
strs_map: dict[str, str]
ints_map: dict[str, int]
class Logger:
def __init__(self, reducer: Reducer) -> None: ...
def set_construction_data_and_log(
self,
module_name: str,
device_ids: list[int],
output_device: int,
broadcast_buffers: bool,
has_sync_bn: bool,
static_graph: bool,
): ...
def set_runtime_stats_and_log(self) -> None: ...
def set_error_and_log(self, error: str) -> None: ...
def _get_ddp_logging_data(self) -> DDPLoggingData: ...
def _set_comm_hook_name(self, comm_hook: str) -> None: ...
def _set_uneven_input_join(self) -> None: ...
def _set_static_graph(self) -> None: ...
class _WorkerServer:
def __init__(self, socket_path: str) -> None: ...
def shutdown(self) -> None: ...
def get_debug_level(): ...
def set_debug_level(): ...
def set_debug_level_from_env(): ...
class DebugLevel(Enum):
OFF = ...
INFO = ...
DETAIL = ...
class ReduceOp:
def __init__(self, op: RedOpType) -> None: ...
SUM: RedOpType = ...
AVG: RedOpType = ...
PRODUCT: RedOpType = ...
MIN: RedOpType = ...
MAX: RedOpType = ...
BAND: RedOpType = ...
BOR: RedOpType = ...
BXOR: RedOpType = ...
PREMUL_SUM: RedOpType = ...
UNUSED: RedOpType = ...
class RedOpType(Enum): ...
class BroadcastOptions:
rootRank: int
rootTensor: int
timeout: timedelta
asyncOp: bool
class AllreduceOptions:
reduceOp: ReduceOp
timeout: timedelta
class AllreduceCoalescedOptions(AllreduceOptions): ...
class ReduceOptions:
reduceOp: ReduceOp
rootRank: int
rootTensor: int
timeout: timedelta
class AllgatherOptions:
timeout: timedelta
asyncOp: bool
class GatherOptions:
rootRank: int
timeout: timedelta
class ScatterOptions:
rootRank: int
timeout: timedelta
asyncOp: bool
class ReduceScatterOptions:
reduceOp: ReduceOp
timeout: timedelta
asyncOp: bool
class BarrierOptions:
device_ids: list[int]
device: torch.device
timeout: timedelta
class AllToAllOptions:
timeout: timedelta
class Store:
def set(self, key: str, value: str): ...
def get(self, key: str) -> bytes: ...
def add(self, key: str, value: int) -> int: ...
def compare_set(
self,
key: str,
expected_value: str,
desired_value: str,
) -> bytes: ...
def delete_key(self, key: str) -> bool: ...
def num_keys(self) -> int: ...
def set_timeout(self, timeout: timedelta): ...
@overload
def wait(self, keys: list[str]): ...
@overload
def wait(self, keys: list[str], timeout: timedelta): ...
class FileStore(Store):
def __init__(self, path: str, numWorkers: int = ...) -> None: ...
class HashStore(Store):
def __init__(self) -> None: ...
class TCPStore(Store):
def __init__(
self,
host_name: str,
port: int,
world_size: int | None = ...,
is_master: bool = ...,
timeout: timedelta = ...,
wait_for_workers: bool = ...,
multi_tenant: bool = ...,
master_listen_fd: int | None = ...,
use_libuv: bool | None = ...,
) -> None: ...
@property
def host(self) -> str: ...
@property
def port(self) -> int: ...
class PrefixStore(Store):
def __init__(self, prefix: str, store: Store) -> None: ...
@property
def underlying_store(self) -> Store: ...
class _ControlCollectives:
def barrier(self, key: str, timeout: timedelta, blocking: bool) -> None: ...
def broadcast_send(self, key: str, data: str, timeout: timedelta) -> None: ...
def broadcast_recv(self, key: str, timeout: timedelta) -> str: ...
def gather_send(self, key: str, data: str, timeout: timedelta) -> None: ...
def gather_recv(self, key: str, timeout: timedelta) -> str: ...
def scatter_send(self, key: str, data: str, timeout: timedelta) -> None: ...
def scatter_recv(self, key: str, timeout: timedelta) -> str: ...
def all_gather(self, key: str, data: str, timeout: timedelta) -> str: ...
def all_sum(self, key: str, data: int, timeout: timedelta) -> int: ...
class _StoreCollectives(_ControlCollectives):
def __init__(self, store: Store, rank: int, world_size: int) -> None: ...
class _DistributedBackendOptions:
def __init__(self) -> None: ...
@property
def store(self) -> Store: ...
@store.setter
def store(self, store: Store) -> None: ...
@property
def group_rank(self) -> int: ...
@group_rank.setter
def group_rank(self, rank: int) -> None: ...
@property
def group_size(self) -> int: ...
@group_size.setter
def group_size(self, size: int) -> None: ...
@property
def timeout(self) -> timedelta: ...
@timeout.setter
def timeout(self, timeout: timedelta) -> None: ...
@property
def group_id(self) -> str: ...
@group_id.setter
def group_id(self, group_id: str) -> None: ...
@property
def global_ranks_in_group(self) -> list[int]: ...
@global_ranks_in_group.setter
def global_ranks_in_group(self, ranks: list[int]) -> None: ...
class Work:
def is_completed(self) -> bool: ...
def is_success(self) -> bool: ...
def exception(self) -> Any: ...
def wait(self, timeout: timedelta = ...) -> bool: ...
def get_future(self) -> Future: ...
def source_rank(self) -> int: ...
def _source_rank(self) -> int: ...
def result(self) -> list[Tensor]: ...
def synchronize(self): ...
def boxed(self) -> ScriptObject: ...
@staticmethod
def unbox(obj: ScriptObject) -> Work: ...
class Backend:
class Options:
def __init__(self, backend: str, timeout: timedelta = ...) -> None: ...
@property
def backend(self) -> str: ...
@property
def _timeout(self) -> timedelta: ...
@_timeout.setter
def _timeout(self, val: timedelta) -> None: ...
def __init__(
self,
rank: int,
size: int,
) -> None: ...
@property
def supports_splitting(self) -> bool: ...
@property
def options(self) -> Options: ...
def rank(self) -> int: ...
def size(self) -> int: ...
def eager_connect_single_device(self, device: torch.device | None) -> None: ...
def _set_sequence_number_for_group(self) -> None: ...
def _set_default_timeout(self, timeout: timedelta) -> None: ...
class ProcessGroup:
class Options:
def __init__(self, backend: str, timeout: timedelta = ...) -> None: ...
@property
def backend(self) -> str: ...
@property
def _timeout(self) -> timedelta: ...
@_timeout.setter
def _timeout(self, val: timedelta) -> None: ...
class BackendType(Enum):
UNDEFINED = ...
GLOO = ...
NCCL = ...
UCC = ...
MPI = ...
CUSTOM = ...
def __init__(
self,
store: Store,
rank: int,
size: int,
options: Options,
) -> None: ...
def rank(self) -> int: ...
def size(self) -> int: ...
@overload
def broadcast(
self,
tensors: list[Tensor],
opts=...,
) -> Work: ...
@overload
def broadcast(
self,
tensor: Tensor,
root: int,
) -> Work: ...
@overload
def allreduce(
self,
tensors: list[Tensor],
opts: AllreduceOptions = ...,
) -> Work: ...
@overload
def allreduce(
self,
tensors: list[Tensor],
op=...,
) -> Work: ...
@overload
def allreduce(
self,
tensor: Tensor,
op=...,
) -> Work: ...
def allreduce_coalesced(
self,
tensors: list[Tensor],
opts=...,
) -> Work: ...
def reduce_scatter_tensor_coalesced(
self,
outputTensors: list[Tensor],
inputTensors: list[Tensor],
opts: ReduceScatterOptions | None = None,
) -> Work: ...
@overload
def reduce(
self,
tensors: list[Tensor],
opts=...,
) -> Work: ...
@overload
def reduce(
self,
tensor: Tensor,
root: int,
op=...,
) -> Work: ...
@overload
def allgather(
self,
output_tensors: list[list[Tensor]],
input_tensors: list[Tensor],
opts=...,
) -> Work: ...
@overload
def allgather(
self,
output_tensors: list[Tensor],
input_tensor: Tensor,
) -> Work: ...
def _allgather_base(
self,
output: Tensor,
input: Tensor,
opts=...,
) -> Work: ...
def allgather_coalesced(
self,
output_lists: list[list[Tensor]],
input_list: list[Tensor],
opts=...,
) -> Work: ...
def allgather_into_tensor_coalesced(
self,
output_lists: list[Tensor],
input_list: list[Tensor],
opts=...,
) -> Work: ...
@overload
def gather(
self,
output_tensors: list[list[Tensor]],
input_tensors: list[Tensor],
opts=...,
) -> Work: ...
@overload
def gather(
self,
output_tensors: list[Tensor],
input_tensor: Tensor,
root: int,
) -> Work: ...
@overload
def scatter(
self,
output_tensors: list[Tensor],
input_tensors: list[list[Tensor]],
opts=...,
) -> Work: ...
@overload
def scatter(
self,
output_tensor: Tensor,
input_tensors: list[Tensor],
root: int,
) -> Work: ...
@overload
def reduce_scatter(
self,
output_tensors: list[Tensor],
input_tensors: list[list[Tensor]],
opts=...,
) -> Work: ...
@overload
def reduce_scatter(
self,
output_tensors: Tensor,
input_tensor: list[Tensor],
) -> Work: ...
def _reduce_scatter_base(
self,
outputTensor: Tensor,
inputTensor: Tensor,
opts: ReduceScatterOptions | None,
) -> Work: ...
@overload
def alltoall_base(
self,
output_tensor: Tensor,
input_tensor: Tensor,
output_split_sizes: list[int],
input_split_sizes: list[int],
opts=...,
) -> Work: ...
@overload
def alltoall_base(
self,
output: Tensor,
input: Tensor,
output_split_sizes: list[int],
input_split_sizes: list[int],
) -> Work: ...
@overload
def alltoall(
self,
output_tensor: list[Tensor],
input_tensor: list[Tensor],
opts=...,
) -> Work: ...
@overload
def alltoall(
self,
output: list[Tensor],
input: list[Tensor],
) -> Work: ...
def send(
self,
tensors: list[Tensor],
dstRank: int,
tag: int,
) -> Work: ...
def recv(
self,
tensors: list[Tensor],
srcRank: int,
tag: int,
) -> Work: ...
def recv_anysource(self, tensors: list[Tensor], tag: int) -> Work: ...
def barrier(self, opts=...) -> Work: ...
def boxed(self) -> ScriptObject: ...
@staticmethod
def unbox(obj: ScriptObject) -> ProcessGroup: ...
def _start_coalescing(self, device: torch.device) -> None: ...
def _end_coalescing(self, device: torch.device) -> Work: ...
def _get_backend_name(self) -> str: ...
def _backend_id(self, backend_type: BackendType) -> int: ...
@property
def _device_types(self) -> list[torch.device]: ...
def _get_backend(self, device: torch.device) -> Backend: ...
def _register_backend(
self,
device: torch.device,
backend_type: BackendType,
backend: Backend | None,
) -> None: ...
def _set_group_name(self, name: str) -> None: ...
def _set_group_desc(self, desc: str) -> None: ...
def name(self) -> str: ...
def _has_hooks(self) -> bool: ...
def _wait_for_pending_works(self) -> None: ...
def _set_sequence_number_for_group(self) -> None: ...
@property
def bound_device_id(self) -> torch.device | None: ...
@bound_device_id.setter
def bound_device_id(self, device: torch.device | None) -> None: ...
@property
def group_name(self) -> str: ...
@property
def group_desc(self) -> str: ...
class ProcessGroupGloo(Backend):
class Device: ...
class Options(ProcessGroup.Options):
devices: list[ProcessGroupGloo.Device]
threads: int
def __init__(self): ...
def __init__(
self,
store: Store,
rank: int,
size: int,
timeout: timedelta,
) -> None: ...
@staticmethod
def create_device(hostname="", interface="") -> Device: ...
@staticmethod
def create_default_device() -> Device: ...
def _set_default_timeout(self, timeout) -> None: ...
class _ProcessGroupWrapper(Backend):
def __init__(self, pg: Backend, gloo_pg: ProcessGroupGloo) -> None: ...
wrapped_pg: Backend
class ProcessGroupNCCL(Backend):
class NCCLConfig:
blocking: int
cga_cluster_size: int
min_ctas: int
max_ctas: int
class Options(ProcessGroup.Options):
config: ProcessGroupNCCL.NCCLConfig
is_high_priority_stream: bool
split_from: ProcessGroupNCCL
split_color: int
global_ranks_in_group: list[int]
group_name: str
def __init__(self, is_high_priority_stream: bool = False): ...
def __init__(
self,
store: Store,
rank: int,
size: int,
options: Options,
) -> None: ...
def _group_start(self) -> None: ...
def _group_end(self) -> None: ...
def _set_default_timeout(self, timeout) -> None: ...
def _shutdown(self) -> None: ...
def perform_nocolor_split(self, device: torch.device) -> None: ...
def comm_split_count(self) -> int: ...
def _add_ephemeral_timeout(self, timeout: timedelta) -> None: ...
@property
def uid(self) -> int: ...
@property
def options(self) -> Options: ... # type: ignore[override]
class ProcessGroupUCC(Backend):
def __init__(
self,
store: Store,
rank: int,
size: int,
timeout: timedelta,
) -> None: ...
class ProcessGroupMPI(Backend):
def __init__(
self,
rank: int,
size: int,
pgComm: int,
) -> None: ...
@staticmethod
def create(ranks: list[int]) -> ProcessGroupMPI: ...
def _compute_bucket_assignment_by_size(
tensors: list[Tensor],
bucket_size_limits: list[int],
expect_sparse_gradient: list[bool] = ...,
tensor_indices: list[int] = ...,
) -> tuple[list[list[int]], list[int]]: ...
def _broadcast_coalesced(
process_group: ProcessGroup,
tensors: list[Tensor],
buffer_size: int,
src: int,
): ...
def _test_python_store(store: Store): ...
def _verify_params_across_processes(
process_group: ProcessGroup,
params: list[Tensor],
logger: Logger | None,
): ...
def _make_nccl_premul_sum(factor: float | list[Tensor]) -> ReduceOp: ...
def _register_process_group(
group_name: str,
process_group: ProcessGroup,
) -> None: ...
def _resolve_process_group(group_name: str) -> ProcessGroup: ...
def _register_work(tensor: torch.Tensor, work: Work) -> ProcessGroup: ...
def _unregister_all_process_groups() -> None: ...
def _unregister_process_group(group_name: str) -> None: ...
class _SymmetricMemory:
@staticmethod
def set_group_info(
group_name: str,
rank: int,
world_size: int,
store: Store,
) -> None: ...
@staticmethod
def empty_strided_p2p(
size: torch.types._size,
stride: torch.types._size,
dtype: torch.dtype,
device: torch.device,
group_name: str,
) -> torch.Tensor: ...
@property
def rank(self) -> int: ...
@property
def world_size(self) -> int: ...
@staticmethod
def rendezvous(tensor: torch.Tensor) -> _SymmetricMemory: ...
def get_buffer(
self,
rank: int,
sizes: torch.types._size,
dtype: torch.dtype,
storage_offset: int | None = 0,
) -> torch.Tensor: ...
def barrier(self, channel: int = 0) -> None: ...
def put_signal(self, dst_rank: int, channel: int = 0) -> None: ...
def wait_signal(self, src_rank: int, channel: int = 0) -> None: ...
class ProcessGroupCudaP2P(Backend):
class Options:
nccl_options: Optional[ProcessGroupNCCL.Options]
buffer_size: Optional[int]
def __init__(self) -> None: ...
def __init__(
self,
store: Store,
rank: int,
size: int,
options: ProcessGroupCudaP2P.Options,
) -> None: ...
def is_p2p_available(self) -> bool: ...
def get_buffer_size(self) -> int: ...
def stream(self) -> torch.cuda.Stream: ...
def intra_node_barrier(self) -> Work: ...
def get_p2p_buffer(
self,
rank: int,
sizes: torch.Size,
dtype: torch.dtype,
storage_offset: Optional[int] = 0,
) -> torch.Tensor: ...
def _shutdown(self) -> None: ...

View File

@ -0,0 +1,188 @@
# mypy: allow-untyped-defs
# mypy: disable-error-code="type-arg"
from datetime import timedelta
from typing import Any, Generic, overload, TypeVar
import torch
from torch._C import Future
from torch._C._autograd import ProfilerEvent
from torch._C._distributed_c10d import Store
from torch._C._profiler import ProfilerConfig
# This module is defined in torch/csrc/distributed/rpc/init.cpp
_DEFAULT_INIT_METHOD: str
_DEFAULT_NUM_WORKER_THREADS: int
_UNSET_RPC_TIMEOUT: float
_DEFAULT_RPC_TIMEOUT_SEC: float
_T = TypeVar("_T")
class RpcBackendOptions:
rpc_timeout: float
init_method: str
def __init__(
self,
rpc_timeout: float = ...,
init_method: str = ...,
) -> None: ...
class WorkerInfo:
def __init__(self, name: str, worker_id: int) -> None: ...
@property
def name(self) -> str: ...
@property
def id(self) -> int: ...
def __eq__(self, other: object) -> bool: ...
class RpcAgent:
def join(self, shutdown: bool = False, timeout: float = 0): ...
def sync(self): ...
def shutdown(self): ...
@overload
def get_worker_info(self) -> WorkerInfo: ...
@overload
def get_worker_info(self, workerName: str) -> WorkerInfo: ...
def get_worker_infos(self) -> list[WorkerInfo]: ...
def _get_device_map(self, dst: WorkerInfo) -> dict[torch.device, torch.device]: ...
def get_debug_info(self) -> dict[str, str]: ...
def get_metrics(self) -> dict[str, str]: ...
class PyRRef(Generic[_T]):
def __init__(self, value: _T, type_hint: Any = None) -> None: ...
def is_owner(self) -> bool: ...
def confirmed_by_owner(self) -> bool: ...
def owner(self) -> WorkerInfo: ...
def owner_name(self) -> str: ...
def to_here(self, timeout: float = ...) -> _T: ...
def local_value(self) -> Any: ...
def rpc_sync(self, timeout: float = ...) -> Any: ...
def rpc_async(self, timeout: float = ...) -> Any: ...
def remote(self, timeout: float = ...) -> Any: ...
def _serialize(self) -> tuple: ...
@staticmethod
def _deserialize(tp: tuple) -> PyRRef: ...
def _get_type(self) -> type[_T]: ...
def _get_future(self) -> Future[_T]: ...
def _get_profiling_future(self) -> Future[_T]: ...
def _set_profiling_future(self, profilingFuture: Future[_T]): ...
class _TensorPipeRpcBackendOptionsBase(RpcBackendOptions):
num_worker_threads: int
device_maps: dict[str, dict[torch.device, torch.device]]
devices: list[torch.device]
def __init__(
self,
num_worker_threads: int,
_transports: list | None,
_channels: list | None,
rpc_timeout: float = ...,
init_method: str = ...,
device_maps: dict[str, dict[torch.device, torch.device]] = {}, # noqa: B006
devices: list[torch.device] = [], # noqa: B006
) -> None: ...
def _set_device_map(
self,
to: str,
device_map: dict[torch.device, torch.device],
): ...
class TensorPipeAgent(RpcAgent):
def __init__(
self,
store: Store,
name: str,
worker_id: int,
world_size: int | None,
opts: _TensorPipeRpcBackendOptionsBase,
reverse_device_maps: dict[str, dict[torch.device, torch.device]],
devices: list[torch.device],
) -> None: ...
def join(self, shutdown: bool = False, timeout: float = 0): ...
def shutdown(self): ...
@overload
def get_worker_info(self) -> WorkerInfo: ...
@overload
def get_worker_info(self, workerName: str) -> WorkerInfo: ...
@overload
def get_worker_info(self, id: int) -> WorkerInfo: ...
def get_worker_infos(self) -> list[WorkerInfo]: ...
def _get_device_map(self, dst: WorkerInfo) -> dict[torch.device, torch.device]: ...
def _update_group_membership(
self,
worker_info: WorkerInfo,
my_devices: list[torch.device],
reverse_device_map: dict[str, dict[torch.device, torch.device]],
is_join: bool,
): ...
def _get_backend_options(self) -> _TensorPipeRpcBackendOptionsBase: ...
@property
def is_static_group(self) -> bool: ...
@property
def store(self) -> Store: ...
def _is_current_rpc_agent_set() -> bool: ...
def _get_current_rpc_agent() -> RpcAgent: ...
def _set_and_start_rpc_agent(agent: RpcAgent): ...
def _reset_current_rpc_agent(): ...
def _delete_all_user_and_unforked_owner_rrefs(timeout: timedelta = ...): ...
def _destroy_rref_context(ignoreRRefLeak: bool): ...
def _rref_context_get_debug_info() -> dict[str, str]: ...
def _cleanup_python_rpc_handler(): ...
def _invoke_rpc_builtin(
dst: WorkerInfo,
opName: str,
rpcTimeoutSeconds: float,
*args: Any,
**kwargs: Any,
): ...
def _invoke_rpc_python_udf(
dst: WorkerInfo,
pickledPythonUDF: str,
tensors: list[torch.Tensor],
rpcTimeoutSeconds: float,
isAsyncExecution: bool,
): ...
def _invoke_rpc_torchscript(
dstWorkerName: str,
qualifiedNameStr: str,
argsTuple: tuple,
kwargsDict: dict,
rpcTimeoutSeconds: float,
isAsyncExecution: bool,
): ...
def _invoke_remote_builtin(
dst: WorkerInfo,
opName: str,
rpcTimeoutSeconds: float,
*args: Any,
**kwargs: Any,
): ...
def _invoke_remote_python_udf(
dst: WorkerInfo,
pickledPythonUDF: str,
tensors: list[torch.Tensor],
rpcTimeoutSeconds: float,
isAsyncExecution: bool,
): ...
def _invoke_remote_torchscript(
dstWorkerName: WorkerInfo,
qualifiedNameStr: str,
rpcTimeoutSeconds: float,
isAsyncExecution: bool,
*args: Any,
**kwargs: Any,
): ...
def get_rpc_timeout() -> float: ...
def enable_gil_profiling(flag: bool): ...
def _set_rpc_timeout(rpcTimeoutSeconds: float): ...
class RemoteProfilerManager:
@staticmethod
def set_current_profiling_key(key: str): ...
def _enable_server_process_global_profiler(new_config: ProfilerConfig): ...
def _disable_server_process_global_profiler() -> list[list[list[ProfilerEvent]]]: ...
def _set_profiler_node_id(default_node_id: int): ...
def _enable_jit_rref_pickle(): ...
def _disable_jit_rref_pickle(): ...

View File

@ -0,0 +1,32 @@
import torch
from torch._C._distributed_c10d import Store
from torch._C._distributed_rpc import _TensorPipeRpcBackendOptionsBase, TensorPipeAgent
# This module is defined in torch/csrc/distributed/rpc/testing/init.cpp
class FaultyTensorPipeRpcBackendOptions(_TensorPipeRpcBackendOptionsBase):
def __init__(
self,
num_worker_threads: int,
rpc_timeout: float,
init_method: str,
messages_to_fail: list[str],
messages_to_delay: dict[str, float],
num_fail_sends: int,
) -> None: ...
num_send_recv_threads: int
messages_to_fail: list[str]
messages_to_delay: dict[str, float]
num_fail_sends: int
class FaultyTensorPipeAgent(TensorPipeAgent):
def __init__(
self,
store: Store,
name: str,
rank: int,
world_size: int,
options: FaultyTensorPipeRpcBackendOptions,
reverse_device_maps: dict[str, dict[torch.device, torch.device]],
devices: list[torch.device],
) -> None: ...

View File

@ -0,0 +1,11 @@
from typing import AnyStr
from torch import Tensor
class UndefinedGrad:
def __init__(self) -> None: ...
def __call__(self, *inputs: Tensor) -> list[Tensor]: ...
class DelayedError:
def __init__(self, msg: AnyStr, num_inputs: int) -> None: ...
def __call__(self, inputs: list[Tensor]) -> list[Tensor]: ...

View File

@ -0,0 +1,83 @@
# mypy: allow-untyped-defs
from enum import Enum
from torch import Tensor
# Defined in torch/csrc/functorch/init.cpp
def _set_dynamic_layer_keys_included(included: bool) -> None: ...
def get_unwrapped(tensor: Tensor) -> Tensor: ...
def is_batchedtensor(tensor: Tensor) -> bool: ...
def is_functionaltensor(tensor: Tensor) -> bool: ...
def is_functorch_wrapped_tensor(tensor: Tensor) -> bool: ...
def is_gradtrackingtensor(tensor: Tensor) -> bool: ...
def is_legacy_batchedtensor(tensor: Tensor) -> bool: ...
def maybe_get_bdim(tensor: Tensor) -> int: ...
def maybe_get_level(tensor: Tensor) -> int: ...
def maybe_current_level() -> int | None: ...
def unwrap_if_dead(tensor: Tensor) -> Tensor: ...
def _unwrap_for_grad(tensor: Tensor, level: int) -> Tensor: ...
def _wrap_for_grad(tensor: Tensor, level: int) -> Tensor: ...
def _unwrap_batched(tensor: Tensor, level: int) -> tuple[Tensor, int | None]: ...
def current_level() -> int: ...
def count_jvp_interpreters() -> int: ...
def _add_batch_dim(tensor: Tensor, bdim: int, level: int) -> Tensor: ...
def set_single_level_autograd_function_allowed(allowed: bool) -> None: ...
def get_single_level_autograd_function_allowed() -> bool: ...
def _unwrap_functional_tensor(tensor: Tensor, reapply_views: bool) -> Tensor: ...
def _wrap_functional_tensor(tensor: Tensor, level: int) -> Tensor: ...
def _vmap_increment_nesting(batch_size: int, randomness: str) -> int: ...
def _vmap_decrement_nesting() -> int: ...
def _grad_increment_nesting() -> int: ...
def _grad_decrement_nesting() -> int: ...
def _jvp_increment_nesting() -> int: ...
def _jvp_decrement_nesting() -> int: ...
# Defined in aten/src/ATen/functorch/Interpreter.h
class TransformType(Enum):
Torch: TransformType = ...
Vmap: TransformType = ...
Grad: TransformType = ...
Jvp: TransformType = ...
Functionalize: TransformType = ...
class RandomnessType(Enum):
Error: TransformType = ...
Same: TransformType = ...
Different: TransformType = ...
class CInterpreter:
def key(self) -> TransformType: ...
def level(self) -> int: ...
class CGradInterpreterPtr:
def __init__(self, interpreter: CInterpreter) -> None: ...
def lift(self, Tensor) -> Tensor: ...
def prevGradMode(self) -> bool: ...
class CJvpInterpreterPtr:
def __init__(self, interpreter: CInterpreter) -> None: ...
def lift(self, Tensor) -> Tensor: ...
def prevFwdGradMode(self) -> bool: ...
class CFunctionalizeInterpreterPtr:
def __init__(self, interpreter: CInterpreter) -> None: ...
def key(self) -> TransformType: ...
def level(self) -> int: ...
def functionalizeAddBackViews(self) -> bool: ...
class CVmapInterpreterPtr:
def __init__(self, interpreter: CInterpreter) -> None: ...
def key(self) -> TransformType: ...
def level(self) -> int: ...
def batchSize(self) -> int: ...
def randomness(self) -> RandomnessType: ...
class DynamicLayer: ...
def get_dynamic_layer_stack_depth() -> int: ...
def get_interpreter_stack() -> list[CInterpreter]: ...
def peek_interpreter_stack() -> CInterpreter: ...
def pop_dynamic_layer_stack() -> DynamicLayer: ...
def pop_dynamic_layer_stack_and_undo_to_depth(int) -> None: ...
def push_dynamic_layer_stack(dl: DynamicLayer) -> int: ...

View File

@ -0,0 +1,4 @@
# Defined in torch/csrc/instruction_counter/Module.cpp
def start() -> int: ...
def end(id: int) -> int: ...

View File

@ -0,0 +1,5 @@
# Defined in torch/csrc/itt.cpp
def is_available() -> None: ...
def rangePush(message: str) -> None: ...
def rangePop() -> None: ...
def mark(message: str) -> None: ...

View File

@ -0,0 +1,27 @@
# mypy: allow-untyped-defs
from torch import Tensor
# defined in torch/csrc/lazy/python/init.cpp
def _mark_step(device: str, devices: list[str], wait: bool): ...
def _wait_device_ops(devices: list[str]): ...
def _reset_metrics(): ...
def _counter_names() -> list[str]: ...
def _counter_value(name: str) -> int: ...
def _metrics_report() -> str: ...
def _get_graph_hash(tensors: list[Tensor]) -> str: ...
def _sync_multi(
tensors: list[Tensor],
devices: list[str],
wait: bool = True,
sync_ltc_data: bool = True,
): ...
def _get_tensor_id(tensor: Tensor) -> int: ...
def _get_tensors_text(tensors: list[Tensor]) -> str: ...
def _get_tensors_dot(tensors: list[Tensor]) -> str: ...
def _get_tensors_backend(tensors: list[Tensor]) -> str: ...
def _get_force_fallback() -> str: ...
def _set_force_fallback(newval: str): ...
def _clear_ir_cache(): ...
def _dump_ir_cache(filename: str): ...
def _set_reuse_ir(val: bool): ...
def _get_default_device_type(): ...

View File

@ -0,0 +1,12 @@
# mypy: allow-untyped-defs
# defined in torch/csrc/lazy/python/init.cpp
from typing import Any
from torch import Tensor
def _init(): ...
def _get_tensors_ts_device_data_node(
tensors: list[Tensor],
) -> tuple[list[int], list[Any]]: ...
def _run_cached_graph(hash_str: str, graph_inputs: list[Any]) -> list[Tensor]: ...

View File

@ -0,0 +1,44 @@
# Defined in torch/csrc/monitor/python_init.cpp
import datetime
from enum import Enum
from typing import Callable
class Aggregation(Enum):
VALUE = ...
MEAN = ...
COUNT = ...
SUM = ...
MAX = ...
MIN = ...
class Stat:
name: str
count: int
def __init__(
self,
name: str,
aggregations: list[Aggregation],
window_size: int,
max_samples: int = -1,
) -> None: ...
def add(self, v: float) -> None: ...
def get(self) -> dict[Aggregation, float]: ...
class Event:
name: str
timestamp: datetime.datetime
data: dict[str, int | float | bool | str]
def __init__(
self,
name: str,
timestamp: datetime.datetime,
data: dict[str, int | float | bool | str],
) -> None: ...
def log_event(e: Event) -> None: ...
class EventHandlerHandle: ...
def register_event_handler(handler: Callable[[Event], None]) -> EventHandlerHandle: ...
def unregister_event_handler(handle: EventHandlerHandle) -> None: ...

View File

@ -0,0 +1,89 @@
# @generated by tools/pyi/gen_pyi.py from torch/_C/_nn.pyi.in
# mypy: disable-error-code="type-arg"
from typing import List, Literal, Optional, overload, Sequence, Tuple, Union
from torch import memory_format, Tensor
from torch.types import _bool, _device, _dtype, _int, _size
# Defined in tools/autograd/templates/python_nn_functions.cpp
def adaptive_max_pool2d(input: Tensor, output_size: Union[_int, _size]) -> Tuple[Tensor, Tensor]: ...
def adaptive_max_pool3d(input: Tensor, output_size: Union[_int, _size]) -> Tuple[Tensor, Tensor]: ...
def avg_pool2d(input: Tensor, kernel_size: Union[_int, _size], stride: Optional[Union[_int, _size]] = None, padding: Union[_int, _size] = 0, ceil_mode: bool = False, count_include_pad: bool = True, divisor_override: Optional[int] = None) -> Tensor: ...
def avg_pool3d(input: Tensor, kernel_size: Union[_int, _size], stride: Optional[Union[_int, _size]] = None, padding: Union[_int, _size] = 0, ceil_mode: bool = False, count_include_pad: bool = True, divisor_override: Optional[int] = None) -> Tensor: ...
def elu_(input: Tensor, alpha: float = ...) -> Tensor: ...
def fractional_max_pool2d(input: Tensor, kernel_size: Union[_int, _size], output_size: Union[_int, _size], _random_samples: Tensor) -> Tuple[Tensor, Tensor]: ...
def fractional_max_pool3d(input: Tensor, kernel_size: Union[_int, _size], output_size: Union[_int, _size], _random_samples: Tensor) -> Tuple[Tensor, Tensor]: ...
def gelu(input: Tensor, approximate: str = ...) -> Tensor: ...
def hardsigmoid(input: Tensor, *, out: Optional[Tensor] = None) -> Tensor: ...
def hardtanh(input: Tensor, min_val: float = ..., max_val: float = ..., *, out: Optional[Tensor] = None) -> Tensor: ...
def hardtanh_(input: Tensor, min_val: float = ..., max_val: float = ...) -> Tensor: ...
def leaky_relu(input: Tensor, negative_slope: float = ..., *, out: Optional[Tensor] = None) -> Tensor: ...
def leaky_relu_(input: Tensor, negative_slope: float = ...) -> Tensor: ...
def linear(input: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor: ...
def log_sigmoid(input: Tensor) -> Tensor: ...
def one_hot(tensor: Tensor, num_classes: int = ...) -> Tensor: ...
def pad(input: Tensor, pad: Sequence[int], mode: str = ..., value: Optional[float] = None) -> Tensor: ...
def scaled_dot_product_attention(query: Tensor, key: Tensor, value: Tensor, attn_mask: Optional[Tensor] = None, dropout_p: float = 0.0, is_causal: bool = False, scale: Optional[float] = None, enable_gqa: bool = False) -> Tensor: ...
def softplus(input: Tensor, beta: float = ..., threshold: float = ...) -> Tensor: ...
def softshrink(input: Tensor, lambd: float = ...) -> Tensor: ...
# Defined in aten/src/ATen/native/mkldnn/Linear.cpp
def mkldnn_linear(input: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor: ...
# Defined at aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp
def mkldnn_reorder_conv2d_weight(
self: Tensor,
padding: List,
stride: List,
dilatation: List,
groups: int,
) -> Tensor: ...
def mkldnn_reorder_conv3d_weight(
self: Tensor,
padding: List,
stride: List,
dilatation: List,
groups: int,
) -> Tensor: ...
# Defined in aten/src/ATen/native/mkldnn/Prelu.cpp
def mkldnn_prelu(input: Tensor, weight: Tensor) -> Tensor: ...
# Defined at tools/autograd/templates/python_nn_functions.cpp
@overload
def _parse_to(
device: _device,
dtype: _dtype,
non_blocking: _bool,
copy: _bool,
*,
memory_format: memory_format,
) -> Tuple[_device, _dtype, _bool, memory_format]: ...
@overload
def _parse_to(
dtype: _dtype,
non_blocking: _bool,
copy: _bool,
*,
memory_format: memory_format,
) -> Tuple[_device, _dtype, _bool, memory_format]: ...
@overload
def _parse_to(
tensor: Tensor,
non_blocking: _bool,
copy: _bool,
*,
memory_format: memory_format,
) -> Tuple[_device, _dtype, _bool, memory_format]: ...
# Defined in aten/src/ATen/native/PackedSequence.cpp
def pad_sequence(
sequences: Union[List[Tensor], Tuple[Tensor, ...]],
batch_first: bool = False,
padding_value: float = 0.0,
padding_side: Union[Literal["left", "right"], str] = "right",
) -> Tensor: ...
def flatten_dense_tensors(tensors: List[Tensor]) -> Tensor: ...
def unflatten_dense_tensors(flat: Tensor, tensors: List[Tensor]) -> List[Tensor]: ...

View File

@ -0,0 +1,7 @@
# mypy: allow-untyped-defs
# Defined in torch/csrc/cuda/shared/nvtx.cpp
def rangePushA(message: str) -> int: ...
def rangePop() -> int: ...
def rangeStartA(message: str) -> int: ...
def rangeEnd(int) -> None: ...
def markA(message: str) -> None: ...

View File

@ -0,0 +1,39 @@
# Defined in torch/csrc/onnx/init.cpp
from enum import Enum
PRODUCER_VERSION: str
class TensorProtoDataType(Enum):
UNDEFINED = ...
FLOAT = ...
UINT8 = ...
INT8 = ...
UINT16 = ...
INT16 = ...
INT32 = ...
INT64 = ...
STRING = ...
BOOL = ...
FLOAT16 = ...
DOUBLE = ...
UINT32 = ...
UINT64 = ...
COMPLEX64 = ...
COMPLEX128 = ...
BFLOAT16 = ...
FLOAT8E5M2 = ...
FLOAT8E4M3FN = ...
FLOAT8E5M2FNUZ = ...
FLOAT8E4M3FNUZ = ...
class OperatorExportTypes(Enum):
ONNX = ...
ONNX_ATEN = ...
ONNX_ATEN_FALLBACK = ...
ONNX_FALLTHROUGH = ...
class TrainingMode(Enum):
EVAL = ...
PRESERVE = ...
TRAINING = ...

View File

@ -0,0 +1,244 @@
from enum import Enum
from typing import Any, Literal
from typing_extensions import TypeAlias
from torch._C import device, dtype, layout
# defined in torch/csrc/profiler/python/init.cpp
class RecordScope(Enum):
FUNCTION = ...
BACKWARD_FUNCTION = ...
TORCHSCRIPT_FUNCTION = ...
KERNEL_FUNCTION_DTYPE = ...
CUSTOM_CLASS = ...
BUILD_FEATURE = ...
LITE_INTERPRETER = ...
USER_SCOPE = ...
STATIC_RUNTIME_OP = ...
STATIC_RUNTIME_MODEL = ...
class ProfilerState(Enum):
Disable = ...
CPU = ...
CUDA = ...
NVTX = ...
ITT = ...
KINETO = ...
KINETO_GPU_FALLBACK = ...
KINETO_PRIVATEUSE1_FALLBACK = ...
KINETO_PRIVATEUSE1 = ...
class ActiveProfilerType(Enum):
NONE = ...
LEGACY = ...
KINETO = ...
NVTX = ...
ITT = ...
class ProfilerActivity(Enum):
CPU = ...
CUDA = ...
XPU = ...
MTIA = ...
PrivateUse1 = ...
class _EventType(Enum):
TorchOp = ...
Backend = ...
Allocation = ...
OutOfMemory = ...
PyCall = ...
PyCCall = ...
Kineto = ...
class _ExperimentalConfig:
def __init__(
self,
profiler_metrics: list[str] = ...,
profiler_measure_per_kernel: bool = ...,
verbose: bool = ...,
performance_events: list[str] = ...,
enable_cuda_sync_events: bool = ...,
) -> None: ...
class ProfilerConfig:
def __init__(
self,
state: ProfilerState,
report_input_shapes: bool,
profile_memory: bool,
with_stack: bool,
with_flops: bool,
with_modules: bool,
experimental_config: _ExperimentalConfig,
) -> None: ...
class _ProfilerEvent:
start_tid: int
start_time_ns: int
children: list[_ProfilerEvent]
# TODO(robieta): remove in favor of `self.typed`
extra_fields: (
_ExtraFields_TorchOp
| _ExtraFields_Backend
| _ExtraFields_Allocation
| _ExtraFields_OutOfMemory
| _ExtraFields_PyCall
| _ExtraFields_PyCCall
| _ExtraFields_Kineto
)
@property
def typed(
self,
) -> (
tuple[Literal[_EventType.TorchOp], _ExtraFields_TorchOp]
| tuple[Literal[_EventType.Backend], _ExtraFields_Backend]
| tuple[Literal[_EventType.Allocation], _ExtraFields_Allocation]
| tuple[Literal[_EventType.OutOfMemory], _ExtraFields_OutOfMemory]
| tuple[Literal[_EventType.PyCall], _ExtraFields_PyCall]
| tuple[Literal[_EventType.PyCCall], _ExtraFields_PyCCall]
| tuple[Literal[_EventType.Kineto], _ExtraFields_Kineto]
): ...
@property
def name(self) -> str: ...
@property
def tag(self) -> _EventType: ...
@property
def id(self) -> int: ...
@property
def parent(self) -> _ProfilerEvent | None: ...
@property
def correlation_id(self) -> int: ...
@property
def end_time_ns(self) -> int: ...
@property
def duration_time_ns(self) -> int: ...
class _TensorMetadata:
impl_ptr: int | None
storage_data_ptr: int | None
id: int | None
@property
def allocation_id(self) -> int | None: ...
@property
def layout(self) -> layout: ...
@property
def device(self) -> device: ...
@property
def dtype(self) -> dtype: ...
@property
def sizes(self) -> list[int]: ...
@property
def strides(self) -> list[int]: ...
Scalar: TypeAlias = int | float | bool | complex
Input: TypeAlias = _TensorMetadata | list[_TensorMetadata] | Scalar | None
class _ExtraFields_TorchOp:
name: str
sequence_number: int
allow_tf32_cublas: bool
@property
def inputs(self) -> list[Input]: ...
@property
def scope(self) -> RecordScope: ...
class _ExtraFields_Backend: ...
class _ExtraFields_Allocation:
ptr: int
id: int | None
alloc_size: int
total_allocated: int
total_reserved: int
@property
def allocation_id(self) -> int | None: ...
@property
def device(self) -> device: ...
class _ExtraFields_OutOfMemory: ...
class _PyFrameState:
line_number: int
function_name: str
@property
def file_name(self) -> str: ...
class _NNModuleInfo:
@property
def self_ptr(self) -> int: ...
@property
def cls_ptr(self) -> int: ...
@property
def cls_name(self) -> str: ...
@property
def parameters(
self,
) -> list[tuple[str, _TensorMetadata, _TensorMetadata | None]]: ...
class _OptimizerInfo:
@property
def parameters(
self,
) -> list[
tuple[
# Parameter
_TensorMetadata,
#
# Gradient (if present during optimizer.step())
_TensorMetadata | None,
#
# Optimizer state for Parameter as (name, tensor) pairs
list[tuple[str, _TensorMetadata]],
]
]: ...
class _ExtraFields_PyCCall:
@property
def caller(self) -> _PyFrameState: ...
class _ExtraFields_PyCall:
@property
def callsite(self) -> _PyFrameState: ...
@property
def caller(self) -> _PyFrameState: ...
@property
def module(self) -> _NNModuleInfo | None: ...
@property
def optimizer(self) -> _OptimizerInfo | None: ...
class _ExtraFields_Kineto: ...
def _add_execution_trace_observer(output_file_path: str) -> bool: ...
def _remove_execution_trace_observer() -> None: ...
def _enable_execution_trace_observer() -> None: ...
def _disable_execution_trace_observer() -> None: ...
def _set_record_concrete_inputs_enabled_val(val: bool) -> None: ...
def _set_fwd_bwd_enabled_val(val: bool) -> None: ...
def _set_cuda_sync_enabled_val(val: bool) -> None: ...
class CapturedTraceback: ...
def gather_traceback(python: bool, script: bool, cpp: bool) -> CapturedTraceback: ...
# The Dict has name, filename, line
def symbolize_tracebacks(
to_symbolize: list[CapturedTraceback],
) -> list[list[dict[str, str]]]: ...
class _RecordFunctionFast:
def __init__(
self,
name: str,
input_values: list | tuple | None = None,
keyword_values: dict | None = None,
) -> None: ...
def __enter__(self) -> None: ...
def __exit__(self, *args: Any) -> None: ...

View File

@ -0,0 +1,3 @@
# Defined in torch/csrc/utils/verbose.cpp
def mkl_set_verbose(enable: int) -> int: ...
def mkldnn_set_verbose(level: int) -> int: ...