Reinforced-Learning-Godot/rl/Lib/site-packages/gymnasium/experimental/wrappers/lambda_action.py

"""A collection of wrappers that all use the LambdaAction class.

* ``LambdaActionV0`` - Transforms the actions based on a function
* ``ClipActionV0`` - Clips the action within a bounds
* ``RescaleActionV0`` - Rescales the action within a minimum and maximum actions
"""
from __future__ import annotations

from typing import Callable

import numpy as np

import gymnasium as gym
from gymnasium.core import ActType, ObsType, WrapperActType
from gymnasium.spaces import Box, Space


__all__ = ["LambdaActionV0", "ClipActionV0", "RescaleActionV0"]


class LambdaActionV0(
    gym.ActionWrapper[ObsType, WrapperActType, ActType], gym.utils.RecordConstructorArgs
):
    """A wrapper that provides a function to modify the action passed to :meth:`step`."""

    def __init__(
        self,
        env: gym.Env[ObsType, ActType],
        func: Callable[[WrapperActType], ActType],
        action_space: Space[WrapperActType] | None,
    ):
        """Initialize LambdaAction.

        Args:
            env: The environment to wrap
            func: Function to apply to the :meth:`step`'s ``action``
            action_space: The updated action space of the wrapper given the function.
        """
        gym.utils.RecordConstructorArgs.__init__(
            self, func=func, action_space=action_space
        )
        gym.Wrapper.__init__(self, env)

        if action_space is not None:
            self.action_space = action_space

        self.func = func

    def action(self, action: WrapperActType) -> ActType:
        """Apply function to action."""
        return self.func(action)


class ClipActionV0(
    LambdaActionV0[ObsType, WrapperActType, ActType], gym.utils.RecordConstructorArgs
):
    """Clip the continuous action within the valid :class:`Box` observation space bound.

    Example:
        >>> import gymnasium as gym
        >>> from gymnasium.experimental.wrappers import ClipActionV0
        >>> import numpy as np
        >>> env = gym.make("Hopper-v4", disable_env_checker=True)
        >>> env = ClipActionV0(env)
        >>> env.action_space
        Box(-inf, inf, (3,), float32)
        >>> _ = env.reset(seed=42)
        >>> _ = env.step(np.array([5.0, -2.0, 0.0], dtype=np.float32))
        ... # Executes the action np.array([1.0, -1.0, 0]) in the base environment
    """

    def __init__(self, env: gym.Env[ObsType, ActType]):
        """A wrapper for clipping continuous actions within the valid bound.

        Args:
            env: The environment to wrap
        """
        assert isinstance(env.action_space, Box)

        gym.utils.RecordConstructorArgs.__init__(self)
        LambdaActionV0.__init__(
            self,
            env=env,
            func=lambda action: np.clip(
                action, env.action_space.low, env.action_space.high
            ),
            action_space=Box(
                -np.inf,
                np.inf,
                shape=env.action_space.shape,
                dtype=env.action_space.dtype,
            ),
        )


class RescaleActionV0(
    LambdaActionV0[ObsType, WrapperActType, ActType], gym.utils.RecordConstructorArgs
):
    """Affinely rescales the continuous action space of the environment to the range [min_action, max_action].

    The base environment :attr:`env` must have an action space of type :class:`spaces.Box`. If :attr:`min_action`
    or :attr:`max_action` are numpy arrays, the shape must match the shape of the environment's action space.

    Example:
        >>> import gymnasium as gym
        >>> from gymnasium.experimental.wrappers import RescaleActionV0
        >>> import numpy as np
        >>> env = gym.make("Hopper-v4", disable_env_checker=True)
        >>> _ = env.reset(seed=42)
        >>> obs, _, _, _, _ = env.step(np.array([1, 1, 1], dtype=np.float32))
        >>> _ = env.reset(seed=42)
        >>> min_action = -0.5
        >>> max_action = np.array([0.0, 0.5, 0.75], dtype=np.float32)
        >>> wrapped_env = RescaleActionV0(env, min_action=min_action, max_action=max_action)
        >>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
        >>> np.alltrue(obs == wrapped_env_obs)
        True
    """

    def __init__(
        self,
        env: gym.Env[ObsType, ActType],
        min_action: float | int | np.ndarray,
        max_action: float | int | np.ndarray,
    ):
        """Constructor for the Rescale Action wrapper.

        Args:
            env (Env): The environment to wrap
            min_action (float, int or np.ndarray): The min values for each action. This may be a numpy array or a scalar.
            max_action (float, int or np.ndarray): The max values for each action. This may be a numpy array or a scalar.
        """
        gym.utils.RecordConstructorArgs.__init__(
            self, min_action=min_action, max_action=max_action
        )

        assert isinstance(env.action_space, Box)
        assert not np.any(env.action_space.low == np.inf) and not np.any(
            env.action_space.high == np.inf
        )

        if not isinstance(min_action, np.ndarray):
            assert np.issubdtype(type(min_action), np.integer) or np.issubdtype(
                type(min_action), np.floating
            )
            min_action = np.full(env.action_space.shape, min_action)

        assert min_action.shape == env.action_space.shape
        assert not np.any(min_action == np.inf)

        if not isinstance(max_action, np.ndarray):
            assert np.issubdtype(type(max_action), np.integer) or np.issubdtype(
                type(max_action), np.floating
            )
            max_action = np.full(env.action_space.shape, max_action)
        assert max_action.shape == env.action_space.shape
        assert not np.any(max_action == np.inf)

        assert isinstance(env.action_space, Box)
        assert np.all(np.less_equal(min_action, max_action))

        # Imagine the x-axis between the old Box and the y-axis being the new Box
        gradient = (env.action_space.high - env.action_space.low) / (
            max_action - min_action
        )
        intercept = gradient * -min_action + env.action_space.low

        LambdaActionV0.__init__(
            self,
            env=env,
            func=lambda action: gradient * action + intercept,
            action_space=Box(
                low=min_action,
                high=max_action,
                shape=env.action_space.shape,
                dtype=env.action_space.dtype,
            ),
        )