I am done

2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions
--- a/rl/Lib/site-packages/gymnasium/utils/init.py
+++ b/rl/Lib/site-packages/gymnasium/utils/init.py
@ -0,0 +1,14 @@
+"""A set of common utilities used within the environments.
+
+These are not intended as API functions, and will not remain stable over time.
+"""
+
+# These submodules should not have any import-time dependencies.
+# We want this since we use `utils` during our import-time sanity checks
+# that verify that our dependencies are actually present.
+from gymnasium.utils.colorize import colorize
+from gymnasium.utils.ezpickle import EzPickle
+from gymnasium.utils.record_constructor import RecordConstructorArgs
+
+
+__all__ = ["colorize", "EzPickle", "RecordConstructorArgs"]
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/init.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/init.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/colorize.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/colorize.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/env_checker.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/env_checker.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/env_match.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/env_match.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/ezpickle.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/ezpickle.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/passive_env_checker.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/passive_env_checker.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/performance.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/performance.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/play.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/play.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/record_constructor.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/record_constructor.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/save_video.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/save_video.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/seeding.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/seeding.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/pycache/step_api_compatibility.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/utils/pycache/step_api_compatibility.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/utils/colorize.py
+++ b/rl/Lib/site-packages/gymnasium/utils/colorize.py
@ -0,0 +1,41 @@
+"""A set of common utilities used within the environments.
+
+These are not intended as API functions, and will not remain stable over time.
+"""
+
+color2num = dict(
+    gray=30,
+    red=31,
+    green=32,
+    yellow=33,
+    blue=34,
+    magenta=35,
+    cyan=36,
+    white=37,
+    crimson=38,
+)
+
+
+def colorize(
+    string: str, color: str, bold: bool = False, highlight: bool = False
+) -> str:
+    """Returns string surrounded by appropriate terminal colour codes to print colourised text.
+
+    Args:
+        string: The message to colourise
+        color: Literal values are gray, red, green, yellow, blue, magenta, cyan, white, crimson
+        bold: If to bold the string
+        highlight: If to highlight the string
+
+    Returns:
+        Colourised string
+    """
+    attr = []
+    num = color2num[color]
+    if highlight:
+        num += 10
+    attr.append(str(num))
+    if bold:
+        attr.append("1")
+    attrs = ";".join(attr)
+    return f"\x1b[{attrs}m{string}\x1b[0m"
--- a/rl/Lib/site-packages/gymnasium/utils/env_checker.py
+++ b/rl/Lib/site-packages/gymnasium/utils/env_checker.py
@ -0,0 +1,333 @@
+"""A set of functions for checking an environment details.
+
+This file is originally from the Stable Baselines3 repository hosted on GitHub
+(https://github.com/DLR-RM/stable-baselines3/)
+Original Author: Antonin Raffin
+
+It also uses some warnings/assertions from the PettingZoo repository hosted on GitHub
+(https://github.com/PettingZoo-Team/PettingZoo)
+Original Author: J K Terry
+
+This was rewritten and split into "env_checker.py" and "passive_env_checker.py" for invasive and passive environment checking
+Original Author: Mark Towers
+
+These projects are covered by the MIT License.
+"""
+
+import inspect
+from copy import deepcopy
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import logger, spaces
+from gymnasium.utils.passive_env_checker import (
+    check_action_space,
+    check_observation_space,
+    env_render_passive_checker,
+    env_reset_passive_checker,
+    env_step_passive_checker,
+)
+
+
+def data_equivalence(data_1, data_2) -> bool:
+    """Assert equality between data 1 and 2, i.e observations, actions, info.
+
+    Args:
+        data_1: data structure 1
+        data_2: data structure 2
+
+    Returns:
+        If observation 1 and 2 are equivalent
+    """
+    if type(data_1) == type(data_2):
+        if isinstance(data_1, dict):
+            return data_1.keys() == data_2.keys() and all(
+                data_equivalence(data_1[k], data_2[k]) for k in data_1.keys()
+            )
+        elif isinstance(data_1, (tuple, list)):
+            return len(data_1) == len(data_2) and all(
+                data_equivalence(o_1, o_2) for o_1, o_2 in zip(data_1, data_2)
+            )
+        elif isinstance(data_1, np.ndarray):
+            if data_1.shape == data_2.shape and data_1.dtype == data_2.dtype:
+                if data_1.dtype == object:
+                    return all(data_equivalence(a, b) for a, b in zip(data_1, data_2))
+                else:
+                    return np.allclose(data_1, data_2, atol=0.00001)
+            else:
+                return False
+        else:
+            return data_1 == data_2
+    else:
+        return False
+
+
+def check_reset_seed(env: gym.Env):
+    """Check that the environment can be reset with a seed.
+
+    Args:
+        env: The environment to check
+
+    Raises:
+        AssertionError: The environment cannot be reset with a random seed,
+            even though `seed` or `kwargs` appear in the signature.
+    """
+    signature = inspect.signature(env.reset)
+    if "seed" in signature.parameters or (
+        "kwargs" in signature.parameters
+        and signature.parameters["kwargs"].kind is inspect.Parameter.VAR_KEYWORD
+    ):
+        try:
+            obs_1, info = env.reset(seed=123)
+            assert (
+                obs_1 in env.observation_space
+            ), "The observation returned by `env.reset(seed=123)` is not within the observation space."
+            assert (
+                env.unwrapped._np_random  # pyright: ignore [reportPrivateUsage]
+                is not None
+            ), "Expects the random number generator to have been generated given a seed was passed to reset. Mostly likely the environment reset function does not call `super().reset(seed=seed)`."
+            seed_123_rng = deepcopy(
+                env.unwrapped._np_random  # pyright: ignore [reportPrivateUsage]
+            )
+
+            obs_2, info = env.reset(seed=123)
+            assert (
+                obs_2 in env.observation_space
+            ), "The observation returned by `env.reset(seed=123)` is not within the observation space."
+            if env.spec is not None and env.spec.nondeterministic is False:
+                assert data_equivalence(
+                    obs_1, obs_2
+                ), "Using `env.reset(seed=123)` is non-deterministic as the observations are not equivalent."
+            assert (
+                env.unwrapped._np_random.bit_generator.state  # pyright: ignore [reportPrivateUsage]
+                == seed_123_rng.bit_generator.state
+            ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random generates are not same when the same seeds are passed to `env.reset`."
+
+            obs_3, info = env.reset(seed=456)
+            assert (
+                obs_3 in env.observation_space
+            ), "The observation returned by `env.reset(seed=456)` is not within the observation space."
+            assert (
+                env.unwrapped._np_random.bit_generator.state  # pyright: ignore [reportPrivateUsage]
+                != seed_123_rng.bit_generator.state
+            ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random number generators are not different when different seeds are passed to `env.reset`."
+
+        except TypeError as e:
+            raise AssertionError(
+                "The environment cannot be reset with a random seed, even though `seed` or `kwargs` appear in the signature. "
+                f"This should never happen, please report this issue. The error was: {e}"
+            ) from e
+
+        seed_param = signature.parameters.get("seed")
+        # Check the default value is None
+        if seed_param is not None and seed_param.default is not None:
+            logger.warn(
+                "The default seed argument in reset should be `None`, otherwise the environment will by default always be deterministic. "
+                f"Actual default: {seed_param.default}"
+            )
+    else:
+        raise gym.error.Error(
+            "The `reset` method does not provide a `seed` or `**kwargs` keyword argument."
+        )
+
+
+def check_reset_options(env: gym.Env):
+    """Check that the environment can be reset with options.
+
+    Args:
+        env: The environment to check
+
+    Raises:
+        AssertionError: The environment cannot be reset with options,
+            even though `options` or `kwargs` appear in the signature.
+    """
+    signature = inspect.signature(env.reset)
+    if "options" in signature.parameters or (
+        "kwargs" in signature.parameters
+        and signature.parameters["kwargs"].kind is inspect.Parameter.VAR_KEYWORD
+    ):
+        try:
+            env.reset(options={})
+        except TypeError as e:
+            raise AssertionError(
+                "The environment cannot be reset with options, even though `options` or `**kwargs` appear in the signature. "
+                f"This should never happen, please report this issue. The error was: {e}"
+            ) from e
+    else:
+        raise gym.error.Error(
+            "The `reset` method does not provide an `options` or `**kwargs` keyword argument."
+        )
+
+
+def check_reset_return_info_deprecation(env: gym.Env):
+    """Makes sure support for deprecated `return_info` argument is dropped.
+
+    Args:
+        env: The environment to check
+    Raises:
+        UserWarning
+    """
+    signature = inspect.signature(env.reset)
+    if "return_info" in signature.parameters:
+        logger.warn(
+            "`return_info` is deprecated as an optional argument to `reset`. `reset`"
+            "should now always return `obs, info` where `obs` is an observation, and `info` is a dictionary"
+            "containing additional information."
+        )
+
+
+def check_seed_deprecation(env: gym.Env):
+    """Makes sure support for deprecated function `seed` is dropped.
+
+    Args:
+        env: The environment to check
+    Raises:
+        UserWarning
+    """
+    seed_fn = getattr(env, "seed", None)
+    if callable(seed_fn):
+        logger.warn(
+            "Official support for the `seed` function is dropped. "
+            "Standard practice is to reset gymnasium environments using `env.reset(seed=<desired seed>)`"
+        )
+
+
+def check_reset_return_type(env: gym.Env):
+    """Checks that :meth:`reset` correctly returns a tuple of the form `(obs , info)`.
+
+    Args:
+        env: The environment to check
+    Raises:
+        AssertionError depending on spec violation
+    """
+    result = env.reset()
+    assert isinstance(
+        result, tuple
+    ), f"The result returned by `env.reset()` was not a tuple of the form `(obs, info)`, where `obs` is a observation and `info` is a dictionary containing additional information. Actual type: `{type(result)}`"
+    assert (
+        len(result) == 2
+    ), f"Calling the reset method did not return a 2-tuple, actual length: {len(result)}"
+
+    obs, info = result
+    assert (
+        obs in env.observation_space
+    ), "The first element returned by `env.reset()` is not within the observation space."
+    assert isinstance(
+        info, dict
+    ), f"The second element returned by `env.reset()` was not a dictionary, actual type: {type(info)}"
+
+
+def check_space_limit(space, space_type: str):
+    """Check the space limit for only the Box space as a test that only runs as part of `check_env`."""
+    if isinstance(space, spaces.Box):
+        if np.any(np.equal(space.low, -np.inf)):
+            logger.warn(
+                f"A Box {space_type} space minimum value is -infinity. This is probably too low."
+            )
+        if np.any(np.equal(space.high, np.inf)):
+            logger.warn(
+                f"A Box {space_type} space maximum value is -infinity. This is probably too high."
+            )
+
+        # Check that the Box space is normalized
+        if space_type == "action":
+            if len(space.shape) == 1:  # for vector boxes
+                if (
+                    np.any(
+                        np.logical_and(
+                            space.low != np.zeros_like(space.low),
+                            np.abs(space.low) != np.abs(space.high),
+                        )
+                    )
+                    or np.any(space.low < -1)
+                    or np.any(space.high > 1)
+                ):
+                    # todo - Add to gymlibrary.ml?
+                    logger.warn(
+                        "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). "
+                        "See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information."
+                    )
+    elif isinstance(space, spaces.Tuple):
+        for subspace in space.spaces:
+            check_space_limit(subspace, space_type)
+    elif isinstance(space, spaces.Dict):
+        for subspace in space.values():
+            check_space_limit(subspace, space_type)
+
+
+def check_env(env: gym.Env, warn: bool = None, skip_render_check: bool = False):
+    """Check that an environment follows Gym API.
+
+    This is an invasive function that calls the environment's reset and step.
+
+    This is particularly useful when using a custom environment.
+    Please take a look at https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/
+    for more information about the API.
+
+    Args:
+        env: The Gym environment that will be checked
+        warn: Ignored
+        skip_render_check: Whether to skip the checks for the render method. True by default (useful for the CI)
+    """
+    if warn is not None:
+        logger.warn("`check_env(warn=...)` parameter is now ignored.")
+
+    assert isinstance(
+        env, gym.Env
+    ), "The environment must inherit from the gymnasium.Env class. See https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/ for more info."
+
+    if env.unwrapped is not env:
+        logger.warn(
+            f"The environment ({env}) is different from the unwrapped version ({env.unwrapped}). This could effect the environment checker as the environment most likely has a wrapper applied to it. We recommend using the raw environment for `check_env` using `env.unwrapped`."
+        )
+
+    # ============= Check the spaces (observation and action) ================
+    assert hasattr(
+        env, "action_space"
+    ), "The environment must specify an action space. See https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/ for more info."
+    check_action_space(env.action_space)
+    check_space_limit(env.action_space, "action")
+
+    assert hasattr(
+        env, "observation_space"
+    ), "The environment must specify an observation space. See https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/ for more info."
+    check_observation_space(env.observation_space)
+    check_space_limit(env.observation_space, "observation")
+
+    # ==== Check the reset method ====
+    check_seed_deprecation(env)
+    check_reset_return_info_deprecation(env)
+    check_reset_return_type(env)
+    check_reset_seed(env)
+    check_reset_options(env)
+
+    # ============ Check the returned values ===============
+    env_reset_passive_checker(env)
+    env_step_passive_checker(env, env.action_space.sample())
+
+    # ==== Check the render method and the declared render modes ====
+    if not skip_render_check:
+        if env.render_mode is not None:
+            env_render_passive_checker(env)
+
+        if env.spec is not None:
+            for render_mode in env.metadata["render_modes"]:
+                new_env = env.spec.make(render_mode=render_mode)
+                new_env.reset()
+                env_render_passive_checker(new_env)
+                new_env.close()
+        else:
+            logger.warn(
+                "Not able to test alternative render modes due to the environment not having a spec. Try instantialising the environment through gymnasium.make"
+            )
+
+    if env.spec is not None:
+        new_env = env.spec.make()
+        new_env.close()
+        try:
+            new_env.close()
+        except Exception as e:
+            logger.warn(
+                f"Calling `env.close()` on the closed environment should be allowed, but it raised an exception: {e}"
+            )
--- a/rl/Lib/site-packages/gymnasium/utils/env_match.py
+++ b/rl/Lib/site-packages/gymnasium/utils/env_match.py
@ -0,0 +1,121 @@
+"""A set of tests to help the desiner of gymansium environments verify that they work correctly."""
+
+import gymnasium as gym
+from gymnasium.utils.env_checker import data_equivalence
+
+
+def check_environments_match(
+    env_a: gym.Env,
+    env_b: gym.Env,
+    num_steps: int,
+    seed: int = 0,
+    skip_obs: bool = False,
+    skip_rew: bool = False,
+    skip_terminal: bool = False,
+    skip_truncated: bool = False,
+    info_comparison: str = "equivalence",
+):
+    """Checks if the environments `env_a` & `env_b` are identical.
+
+    Args:
+        env_a: First environment to check.
+        env_b: Second environment to check.
+        num_steps: number of timesteps to test for, setting to 0 tests only resetting.
+        seed: used the seed the reset & actions.
+        skip_obs: If `True` it does not check for equivalence of the observation.
+        skip_rew: If `True` it does not check for equivalence of the observation.
+        skip_terminal: If `True` it does not check for equivalence of the observation.
+        skip_truncated: If `True` it does not check for equivalence of the observation.
+        skip_info: If `True` it does not check for equivalence of the observation.
+        info_comparison: The options are
+            If "equivalence" then checks if the `info`s are identical,
+            If "superset" checks if `info_b` is a (non-strict) superset of `info_a`
+            If "keys-equivalence" checks if the `info`s keys are identical (while ignoring the values).
+            If "keys-superset" checks if the `info_b`s keys are a superset of `info_a`'s keys.
+            If "skip" no checks are made at the `info`.
+    """
+    assert info_comparison in [
+        "equivalence",
+        "superset",
+        "skip",
+        "keys-equivalence",
+        "keys-superset",
+    ]
+
+    assert env_a.action_space == env_b.action_space
+    assert skip_obs or env_b.observation_space == env_b.observation_space
+
+    env_a.action_space.seed(seed)
+    obs_a, info_a = env_a.reset(seed=seed)
+    obs_b, info_b = env_b.reset(seed=seed)
+
+    assert skip_obs or data_equivalence(
+        obs_a, obs_b
+    ), "resetting observation is not equivalent"
+    if info_comparison == "equivalence":
+        assert data_equivalence(info_a, info_b), "resetting info is not equivalent"
+    elif info_comparison == "superset":
+        for key in info_a:
+            assert data_equivalence(
+                info_a[key], info_b[key]
+            ), "resetting info is not a superset"
+    elif info_comparison == "keys-equivalance":
+        assert info_a.keys() == info_b.keys(), "resetting info keys are not equivalent"
+    elif info_comparison == "keys-superset":
+        assert info_b.keys() >= info_a.keys(), "resetting info keys are not a superset"
+
+    for _ in range(num_steps):
+        action = env_a.action_space.sample()
+        obs_a, rew_a, terminal_a, truncated_a, info_a = env_a.step(action)
+        obs_b, rew_b, terminal_b, truncated_b, info_b = env_b.step(action)
+        assert skip_obs or data_equivalence(
+            obs_a, obs_b
+        ), "stepping observation is not equivalent"
+        assert skip_rew or data_equivalence(
+            rew_a, rew_b
+        ), "stepping reward is not equivalent"
+        assert (
+            skip_terminal or terminal_a == terminal_b
+        ), "stepping terminal is not equivalent"
+        assert (
+            skip_truncated or truncated_a == truncated_b
+        ), "stepping truncated is not equivalent"
+        if info_comparison == "equivalence":
+            assert data_equivalence(info_a, info_b), "stepping info is not equivalent"
+        elif info_comparison == "superset":
+            for key in info_a:
+                assert data_equivalence(
+                    info_a[key], info_b[key]
+                ), "stepping info is not a superset"
+        elif info_comparison == "keys-equivalance":
+            assert (
+                info_a.keys() == info_b.keys()
+            ), "stepping info keys are not equivalent"
+        elif info_comparison == "keys-superset":
+            assert (
+                info_b.keys() >= info_a.keys()
+            ), "stepping info keys are not a superset"
+
+        if terminal_a or truncated_a or terminal_b or truncated_b:
+            obs_a, info_a = env_a.reset(seed=seed)
+            obs_b, info_b = env_b.reset(seed=seed)
+            assert skip_obs or data_equivalence(
+                obs_a, obs_b
+            ), "resetting observation is not equivalent"
+            if info_comparison == "equivalence":
+                assert data_equivalence(
+                    info_a, info_b
+                ), "resetting info is not equivalent"
+            elif info_comparison == "superset":
+                for key in info_a:
+                    assert data_equivalence(
+                        info_a[key], info_b[key]
+                    ), "resetting info is not a superset"
+            elif info_comparison == "keys-equivalance":
+                assert (
+                    info_a.keys() == info_b.keys()
+                ), "resetting info keys are not equivalent"
+            elif info_comparison == "keys-superset":
+                assert (
+                    info_b.keys() >= info_a.keys()
+                ), "resetting info keys are not a superset"
--- a/rl/Lib/site-packages/gymnasium/utils/ezpickle.py
+++ b/rl/Lib/site-packages/gymnasium/utils/ezpickle.py
@ -0,0 +1,36 @@
+"""Class for pickling and unpickling objects via their constructor arguments."""
+from typing import Any
+
+
+class EzPickle:
+    """Objects that are pickled and unpickled via their constructor arguments.
+
+    Example:
+        >>> class Animal: pass
+        >>> class Dog(Animal, EzPickle):
+        ...    def __init__(self, furcolor, tailkind="bushy"):
+        ...        Animal.__init__(self)
+        ...        EzPickle.__init__(self, furcolor, tailkind)
+
+    When this object is unpickled, a new ``Dog`` will be constructed by passing the provided furcolor and tailkind into the constructor.
+    However, philosophers are still not sure whether it is still the same dog.
+
+    This is generally needed only for environments which wrap C/C++ code, such as MuJoCo and Atari.
+    """
+
+    def __init__(self, *args: Any, **kwargs: Any):
+        """Uses the ``args`` and ``kwargs`` from the object's constructor for pickling."""
+        self._ezpickle_args = args
+        self._ezpickle_kwargs = kwargs
+
+    def __getstate__(self):
+        """Returns the object pickle state with args and kwargs."""
+        return {
+            "_ezpickle_args": self._ezpickle_args,
+            "_ezpickle_kwargs": self._ezpickle_kwargs,
+        }
+
+    def __setstate__(self, d):
+        """Sets the object pickle state using d."""
+        out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"])
+        self.__dict__.update(out.__dict__)
--- a/rl/Lib/site-packages/gymnasium/utils/passive_env_checker.py
+++ b/rl/Lib/site-packages/gymnasium/utils/passive_env_checker.py
@ -0,0 +1,366 @@
+"""A set of functions for passively checking environment implementations."""
+import inspect
+from functools import partial
+from typing import Callable
+
+import numpy as np
+
+from gymnasium import Space, error, logger, spaces
+
+
+__all__ = [
+    "env_render_passive_checker",
+    "env_reset_passive_checker",
+    "env_step_passive_checker",
+]
+
+
+def _check_box_observation_space(observation_space: spaces.Box):
+    """Checks that a :class:`Box` observation space is defined in a sensible way.
+
+    Args:
+        observation_space: A box observation space
+    """
+    assert (
+        observation_space.low.shape == observation_space.shape
+    ), f"The Box observation space shape and low shape have different shapes, low shape: {observation_space.low.shape}, box shape: {observation_space.shape}"
+    assert (
+        observation_space.high.shape == observation_space.shape
+    ), f"The Box observation space shape and high shape have have different shapes, high shape: {observation_space.high.shape}, box shape: {observation_space.shape}"
+
+    if np.any(observation_space.low == observation_space.high):
+        logger.warn(
+            "A Box observation space maximum and minimum values are equal. "
+            f"Actual equal coordinates: {[x for x in zip(*np.where(observation_space.low == observation_space.high))]}"
+        )
+    elif np.any(observation_space.high < observation_space.low):
+        logger.warn(
+            "A Box observation space low value is greater than a high value. "
+            f"Actual less than coordinates: {[x for x in zip(*np.where(observation_space.high < observation_space.low))]}"
+        )
+
+
+def _check_box_action_space(action_space: spaces.Box):
+    """Checks that a :class:`Box` action space is defined in a sensible way.
+
+    Args:
+        action_space: A box action space
+    """
+    assert (
+        action_space.low.shape == action_space.shape
+    ), f"The Box action space shape and low shape have have different shapes, low shape: {action_space.low.shape}, box shape: {action_space.shape}"
+    assert (
+        action_space.high.shape == action_space.shape
+    ), f"The Box action space shape and high shape have different shapes, high shape: {action_space.high.shape}, box shape: {action_space.shape}"
+
+    if np.any(action_space.low == action_space.high):
+        logger.warn(
+            "A Box action space maximum and minimum values are equal. "
+            f"Actual equal coordinates: {[x for x in zip(*np.where(action_space.low == action_space.high))]}"
+        )
+
+
+def check_space(
+    space: Space, space_type: str, check_box_space_fn: Callable[[spaces.Box], None]
+):
+    """A passive check of the environment action space that should not affect the environment."""
+    if not isinstance(space, spaces.Space):
+        raise AssertionError(
+            f"{space_type} space does not inherit from `gymnasium.spaces.Space`, actual type: {type(space)}"
+        )
+
+    elif isinstance(space, spaces.Box):
+        check_box_space_fn(space)
+    elif isinstance(space, spaces.Discrete):
+        assert (
+            0 < space.n
+        ), f"Discrete {space_type} space's number of elements must be positive, actual number of elements: {space.n}"
+        assert (
+            space.shape == ()
+        ), f"Discrete {space_type} space's shape should be empty, actual shape: {space.shape}"
+    elif isinstance(space, spaces.MultiDiscrete):
+        assert (
+            space.shape == space.nvec.shape
+        ), f"Multi-discrete {space_type} space's shape must be equal to the nvec shape, space shape: {space.shape}, nvec shape: {space.nvec.shape}"
+        assert np.all(
+            0 < space.nvec
+        ), f"Multi-discrete {space_type} space's all nvec elements must be greater than 0, actual nvec: {space.nvec}"
+    elif isinstance(space, spaces.MultiBinary):
+        assert np.all(
+            0 < np.asarray(space.shape)
+        ), f"Multi-binary {space_type} space's all shape elements must be greater than 0, actual shape: {space.shape}"
+    elif isinstance(space, spaces.Tuple):
+        assert 0 < len(
+            space.spaces
+        ), f"An empty Tuple {space_type} space is not allowed."
+        for subspace in space.spaces:
+            check_space(subspace, space_type, check_box_space_fn)
+    elif isinstance(space, spaces.Dict):
+        assert 0 < len(
+            space.spaces.keys()
+        ), f"An empty Dict {space_type} space is not allowed."
+        for subspace in space.values():
+            check_space(subspace, space_type, check_box_space_fn)
+
+
+check_observation_space = partial(
+    check_space,
+    space_type="observation",
+    check_box_space_fn=_check_box_observation_space,
+)
+check_action_space = partial(
+    check_space, space_type="action", check_box_space_fn=_check_box_action_space
+)
+
+
+def check_obs(obs, observation_space: spaces.Space, method_name: str):
+    """Check that the observation returned by the environment correspond to the declared one.
+
+    Args:
+        obs: The observation to check
+        observation_space: The observation space of the observation
+        method_name: The method name that generated the observation
+    """
+    pre = f"The obs returned by the `{method_name}()` method"
+    if isinstance(observation_space, spaces.Discrete):
+        if not isinstance(obs, (np.int64, int)):
+            logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
+    elif isinstance(observation_space, spaces.Box):
+        if observation_space.shape != ():
+            if not isinstance(obs, np.ndarray):
+                logger.warn(
+                    f"{pre} was expecting a numpy array, actual type: {type(obs)}"
+                )
+            elif obs.dtype != observation_space.dtype:
+                logger.warn(
+                    f"{pre} was expecting numpy array dtype to be {observation_space.dtype}, actual type: {obs.dtype}"
+                )
+    elif isinstance(observation_space, (spaces.MultiBinary, spaces.MultiDiscrete)):
+        if not isinstance(obs, np.ndarray):
+            logger.warn(f"{pre} was expecting a numpy array, actual type: {type(obs)}")
+    elif isinstance(observation_space, spaces.Tuple):
+        if not isinstance(obs, tuple):
+            logger.warn(f"{pre} was expecting a tuple, actual type: {type(obs)}")
+        assert len(obs) == len(
+            observation_space.spaces
+        ), f"{pre} length is not same as the observation space length, obs length: {len(obs)}, space length: {len(observation_space.spaces)}"
+        for sub_obs, sub_space in zip(obs, observation_space.spaces):
+            check_obs(sub_obs, sub_space, method_name)
+    elif isinstance(observation_space, spaces.Dict):
+        assert isinstance(obs, dict), f"{pre} must be a dict, actual type: {type(obs)}"
+        assert (
+            obs.keys() == observation_space.spaces.keys()
+        ), f"{pre} observation keys is not same as the observation space keys, obs keys: {list(obs.keys())}, space keys: {list(observation_space.spaces.keys())}"
+        for space_key in observation_space.spaces.keys():
+            check_obs(obs[space_key], observation_space[space_key], method_name)
+
+    try:
+        if obs not in observation_space:
+            logger.warn(f"{pre} is not within the observation space.")
+    except Exception as e:
+        logger.warn(f"{pre} is not within the observation space with exception: {e}")
+
+
+def env_reset_passive_checker(env, **kwargs):
+    """A passive check of the `Env.reset` function investigating the returning reset information and returning the data unchanged."""
+    signature = inspect.signature(env.reset)
+    if "seed" not in signature.parameters and "kwargs" not in signature.parameters:
+        logger.deprecation(
+            "Current gymnasium version requires that `Env.reset` can be passed a `seed` instead of using `Env.seed` for resetting the environment random number generator."
+        )
+    else:
+        seed_param = signature.parameters.get("seed")
+        # Check the default value is None
+        if seed_param is not None and seed_param.default is not None:
+            logger.warn(
+                "The default seed argument in `Env.reset` should be `None`, otherwise the environment will by default always be deterministic. "
+                f"Actual default: {seed_param}"
+            )
+
+    if "options" not in signature.parameters and "kwargs" not in signature.parameters:
+        logger.deprecation(
+            "Current gymnasium version requires that `Env.reset` can be passed `options` to allow the environment initialisation to be passed additional information."
+        )
+
+    # Checks the result of env.reset with kwargs
+    result = env.reset(**kwargs)
+
+    if not isinstance(result, tuple):
+        logger.warn(
+            f"The result returned by `env.reset()` was not a tuple of the form `(obs, info)`, where `obs` is a observation and `info` is a dictionary containing additional information. Actual type: `{type(result)}`"
+        )
+    elif len(result) != 2:
+        logger.warn(
+            "The result returned by `env.reset()` should be `(obs, info)` by default, , where `obs` is a observation and `info` is a dictionary containing additional information."
+        )
+    else:
+        obs, info = result
+        check_obs(obs, env.observation_space, "reset")
+        assert isinstance(
+            info, dict
+        ), f"The second element returned by `env.reset()` was not a dictionary, actual type: {type(info)}"
+    return result
+
+
+def env_step_passive_checker(env, action):
+    """A passive check for the environment step, investigating the returning data then returning the data unchanged."""
+    # We don't check the action as for some environments then out-of-bounds values can be given
+    result = env.step(action)
+    assert isinstance(
+        result, tuple
+    ), f"Expects step result to be a tuple, actual type: {type(result)}"
+    if len(result) == 4:
+        logger.deprecation(
+            "Core environment is written in old step API which returns one bool instead of two. "
+            "It is recommended to rewrite the environment with new step API. "
+        )
+        obs, reward, done, info = result
+
+        if not isinstance(done, (bool, np.bool_)):
+            logger.warn(
+                f"Expects `done` signal to be a boolean, actual type: {type(done)}"
+            )
+    elif len(result) == 5:
+        obs, reward, terminated, truncated, info = result
+
+        # np.bool is actual python bool not np boolean type, therefore bool_ or bool8
+        if not isinstance(terminated, (bool, np.bool_)):
+            logger.warn(
+                f"Expects `terminated` signal to be a boolean, actual type: {type(terminated)}"
+            )
+        if not isinstance(truncated, (bool, np.bool_)):
+            logger.warn(
+                f"Expects `truncated` signal to be a boolean, actual type: {type(truncated)}"
+            )
+    else:
+        raise error.Error(
+            f"Expected `Env.step` to return a four or five element tuple, actual number of elements returned: {len(result)}."
+        )
+
+    check_obs(obs, env.observation_space, "step")
+
+    if not (
+        np.issubdtype(type(reward), np.integer)
+        or np.issubdtype(type(reward), np.floating)
+    ):
+        logger.warn(
+            f"The reward returned by `step()` must be a float, int, np.integer or np.floating, actual type: {type(reward)}"
+        )
+    else:
+        if np.isnan(reward):
+            logger.warn("The reward is a NaN value.")
+        if np.isinf(reward):
+            logger.warn("The reward is an inf value.")
+
+    assert isinstance(
+        info, dict
+    ), f"The `info` returned by `step()` must be a python dictionary, actual type: {type(info)}"
+
+    return result
+
+
+def _check_render_return(render_mode, render_return):
+    """Produces warning if `render_return` doesn't match `render_mode`."""
+    if render_mode == "human":
+        if render_return is not None:
+            logger.warn(
+                f"Human rendering should return `None`, got {type(render_return)}"
+            )
+    elif render_mode == "rgb_array":
+        if not isinstance(render_return, np.ndarray):
+            logger.warn(
+                f"RGB-array rendering should return a numpy array, got {type(render_return)}"
+            )
+        else:
+            if render_return.dtype != np.uint8:
+                logger.warn(
+                    f"RGB-array rendering should return a numpy array with dtype uint8, got {render_return.dtype}"
+                )
+            if render_return.ndim != 3:
+                logger.warn(
+                    f"RGB-array rendering should return a numpy array with three axes, got {render_return.ndim}"
+                )
+            if render_return.ndim == 3 and render_return.shape[2] != 3:
+                logger.warn(
+                    f"RGB-array rendering should return a numpy array in which the last axis has three dimensions, got {render_return.shape[2]}"
+                )
+    elif render_mode == "depth_array":
+        if not isinstance(render_return, np.ndarray):
+            logger.warn(
+                f"Depth-array rendering should return a numpy array, got {type(render_return)}"
+            )
+        elif render_return.ndim != 2:
+            logger.warn(
+                f"Depth-array rendering should return a numpy array with two axes, got {render_return.ndim}"
+            )
+    elif render_mode in ["ansi", "ascii"]:
+        if not isinstance(render_return, str):
+            logger.warn(
+                f"ANSI/ASCII rendering should produce a string, got {type(render_return)}"
+            )
+    elif render_mode.endswith("_list"):
+        if not isinstance(render_return, list):
+            logger.warn(
+                f"Render mode `{render_mode}` should produce a list, got {type(render_return)}"
+            )
+        else:
+            base_render_mode = render_mode[: -len("_list")]
+            for item in render_return:
+                _check_render_return(
+                    base_render_mode, item
+                )  # Check that each item of the list matches the base render mode
+
+
+def env_render_passive_checker(env):
+    """A passive check of the `Env.render` that the declared render modes/fps in the metadata of the environment is declared."""
+    render_modes = env.metadata.get("render_modes")
+    if render_modes is None:
+        logger.warn(
+            "No render modes was declared in the environment (env.metadata['render_modes'] is None or not defined), you may have trouble when calling `.render()`."
+        )
+    else:
+        if not isinstance(render_modes, (list, tuple)):
+            logger.warn(
+                f"Expects the render_modes to be a sequence (i.e. list, tuple), actual type: {type(render_modes)}"
+            )
+        elif not all(isinstance(mode, str) for mode in render_modes):
+            logger.warn(
+                f"Expects all render modes to be strings, actual types: {[type(mode) for mode in render_modes]}"
+            )
+
+        render_fps = env.metadata.get("render_fps")
+        # We only require `render_fps` if rendering is actually implemented
+        if len(render_modes) > 0:
+            if render_fps is None:
+                logger.warn(
+                    "No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps."
+                )
+            else:
+                if not (
+                    np.issubdtype(type(render_fps), np.integer)
+                    or np.issubdtype(type(render_fps), np.floating)
+                ):
+                    logger.warn(
+                        f"Expects the `env.metadata['render_fps']` to be an integer or a float, actual type: {type(render_fps)}"
+                    )
+                else:
+                    assert (
+                        render_fps > 0
+                    ), f"Expects the `env.metadata['render_fps']` to be greater than zero, actual value: {render_fps}"
+
+        # env.render is now an attribute with default None
+        if len(render_modes) == 0:
+            assert (
+                env.render_mode is None
+            ), f"With no render_modes, expects the Env.render_mode to be None, actual value: {env.render_mode}"
+        else:
+            assert env.render_mode is None or env.render_mode in render_modes, (
+                "The environment was initialized successfully however with an unsupported render mode. "
+                f"Render mode: {env.render_mode}, modes: {render_modes}"
+            )
+
+    result = env.render()
+    if env.render_mode is not None:
+        _check_render_return(env.render_mode, result)
+
+    return result
--- a/rl/Lib/site-packages/gymnasium/utils/performance.py
+++ b/rl/Lib/site-packages/gymnasium/utils/performance.py
@ -0,0 +1,101 @@
+"""A collection of perofmance bencharks, useful for debugging performance related issues."""
+
+import time
+from typing import Callable
+
+import gymnasium
+
+
+def benchmark_step(env: gymnasium.Env, target_duration: int = 5, seed=None) -> float:
+    """A benchmark to measure the runtime performance of step for an environment.
+
+    example usage:
+        ```py
+        env_old = ...
+        old_throughput = benchmark_step(env_old)
+        env_new = ...
+        new_throughput = benchmark_step(env_old)
+        slowdown = old_throughput / new_throughput
+        ```
+
+    Args:
+        env: the environment to benchmarked.
+        target_duration: the duration of the benchmark in seconds (note: it will go slightly over it).
+        seed: seeds the environment and action sampled.
+
+    Returns: the average steps per second.
+    """
+    steps = 0
+    end = 0.0
+    env.reset(seed=seed)
+    env.action_space.sample()
+    start = time.time()
+
+    while True:
+        steps += 1
+        action = env.action_space.sample()
+        _, _, terminal, truncated, _ = env.step(action)
+
+        if terminal or truncated:
+            env.reset()
+
+        if time.time() - start > target_duration:
+            end = time.time()
+            break
+
+    length = end - start
+
+    steps_per_time = steps / length
+    return steps_per_time
+
+
+def benchmark_init(
+    env_lambda: Callable[[], gymnasium.Env], target_duration: int = 5, seed=None
+) -> float:
+    """A benchmark to measure the initialization time and first reset.
+
+    Args:
+        env_lambda: the function to initialize the environment.
+        target_duration: the duration of the benchmark in seconds (note: it will go slightly over it).
+        seed: seeds the first reset of the environment.
+    """
+    inits = 0
+    end = 0.0
+    start = time.time()
+    while True:
+        inits += 1
+        env = env_lambda()
+        env.reset(seed=seed)
+
+        if time.time() - start > target_duration:
+            end = time.time()
+            break
+    length = end - start
+
+    inits_per_time = inits / length
+    return inits_per_time
+
+
+def benchmark_render(env: gymnasium.Env, target_duration: int = 5) -> float:
+    """A benchmark to measure the time of render().
+
+    Note: does not work with `render_mode='human'`
+    Args:
+        env: the environment to benchmarked (Note: must be renderable).
+        target_duration: the duration of the benchmark in seconds (note: it will go slightly over it).
+
+    """
+    renders = 0
+    end = 0.0
+    start = time.time()
+    while True:
+        renders += 1
+        env.render()
+
+        if time.time() - start > target_duration:
+            end = time.time()
+            break
+    length = end - start
+
+    renders_per_time = renders / length
+    return renders_per_time
--- a/rl/Lib/site-packages/gymnasium/utils/play.py
+++ b/rl/Lib/site-packages/gymnasium/utils/play.py
@ -0,0 +1,403 @@
+"""Utilities of visualising an environment."""
+from collections import deque
+from typing import Callable, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import Env, logger
+from gymnasium.core import ActType, ObsType
+from gymnasium.error import DependencyNotInstalled
+from gymnasium.logger import deprecation
+
+
+try:
+    import pygame
+    from pygame import Surface
+    from pygame.event import Event
+except ImportError as e:
+    raise gym.error.DependencyNotInstalled(
+        "pygame is not installed, run `pip install gymnasium[classic-control]`"
+    ) from e
+
+try:
+    import matplotlib
+
+    matplotlib.use("TkAgg")
+    import matplotlib.pyplot as plt
+except ImportError:
+    logger.warn("matplotlib is not installed, run `pip install gymnasium[other]`")
+    matplotlib, plt = None, None
+
+
+class MissingKeysToAction(Exception):
+    """Raised when the environment does not have a default ``keys_to_action`` mapping."""
+
+
+class PlayableGame:
+    """Wraps an environment allowing keyboard inputs to interact with the environment."""
+
+    def __init__(
+        self,
+        env: Env,
+        keys_to_action: Optional[Dict[Tuple[int, ...], int]] = None,
+        zoom: Optional[float] = None,
+    ):
+        """Wraps an environment with a dictionary of keyboard buttons to action and if to zoom in on the environment.
+
+        Args:
+            env: The environment to play
+            keys_to_action: The dictionary of keyboard tuples and action value
+            zoom: If to zoom in on the environment render
+        """
+        if env.render_mode not in {"rgb_array", "rgb_array_list"}:
+            raise ValueError(
+                "PlayableGame wrapper works only with rgb_array and rgb_array_list render modes, "
+                f"but your environment render_mode = {env.render_mode}."
+            )
+
+        self.env = env
+        self.relevant_keys = self._get_relevant_keys(keys_to_action)
+        # self.video_size is the size of the video that is being displayed.
+        # The window size may be larger, in that case we will add black bars
+        self.video_size = self._get_video_size(zoom)
+        self.screen = pygame.display.set_mode(self.video_size, pygame.RESIZABLE)
+        self.pressed_keys = []
+        self.running = True
+
+    def _get_relevant_keys(
+        self, keys_to_action: Optional[Dict[Tuple[int], int]] = None
+    ) -> set:
+        if keys_to_action is None:
+            if hasattr(self.env, "get_keys_to_action"):
+                keys_to_action = self.env.get_keys_to_action()
+            elif hasattr(self.env.unwrapped, "get_keys_to_action"):
+                keys_to_action = self.env.unwrapped.get_keys_to_action()
+            else:
+                assert self.env.spec is not None
+                raise MissingKeysToAction(
+                    f"{self.env.spec.id} does not have explicit key to action mapping, "
+                    "please specify one manually"
+                )
+        assert isinstance(keys_to_action, dict)
+        relevant_keys = set(sum((list(k) for k in keys_to_action.keys()), []))
+        return relevant_keys
+
+    def _get_video_size(self, zoom: Optional[float] = None) -> Tuple[int, int]:
+        rendered = self.env.render()
+        if isinstance(rendered, List):
+            rendered = rendered[-1]
+        assert rendered is not None and isinstance(rendered, np.ndarray)
+        video_size = (rendered.shape[1], rendered.shape[0])
+
+        if zoom is not None:
+            video_size = (int(video_size[0] * zoom), int(video_size[1] * zoom))
+
+        return video_size
+
+    def process_event(self, event: Event):
+        """Processes a PyGame event.
+
+        In particular, this function is used to keep track of which buttons are currently pressed
+        and to exit the :func:`play` function when the PyGame window is closed.
+
+        Args:
+            event: The event to process
+        """
+        if event.type == pygame.KEYDOWN:
+            if event.key in self.relevant_keys:
+                self.pressed_keys.append(event.key)
+            elif event.key == pygame.K_ESCAPE:
+                self.running = False
+        elif event.type == pygame.KEYUP:
+            if event.key in self.relevant_keys:
+                self.pressed_keys.remove(event.key)
+        elif event.type == pygame.QUIT:
+            self.running = False
+        elif event.type == pygame.WINDOWRESIZED:
+            # Compute the maximum video size that fits into the new window
+            scale_width = event.x / self.video_size[0]
+            scale_height = event.y / self.video_size[1]
+            scale = min(scale_height, scale_width)
+            self.video_size = (scale * self.video_size[0], scale * self.video_size[1])
+
+
+def display_arr(
+    screen: Surface, arr: np.ndarray, video_size: Tuple[int, int], transpose: bool
+):
+    """Displays a numpy array on screen.
+
+    Args:
+        screen: The screen to show the array on
+        arr: The array to show
+        video_size: The video size of the screen
+        transpose: If to transpose the array on the screen
+    """
+    arr_min, arr_max = np.min(arr), np.max(arr)
+    arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
+    pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
+    pyg_img = pygame.transform.scale(pyg_img, video_size)
+    # We might have to add black bars if surface_size is larger than video_size
+    surface_size = screen.get_size()
+    width_offset = (surface_size[0] - video_size[0]) / 2
+    height_offset = (surface_size[1] - video_size[1]) / 2
+    screen.fill((0, 0, 0))
+    screen.blit(pyg_img, (width_offset, height_offset))
+
+
+def play(
+    env: Env,
+    transpose: Optional[bool] = True,
+    fps: Optional[int] = None,
+    zoom: Optional[float] = None,
+    callback: Optional[Callable] = None,
+    keys_to_action: Optional[Dict[Union[Tuple[Union[str, int]], str], ActType]] = None,
+    seed: Optional[int] = None,
+    noop: ActType = 0,
+):
+    """Allows one to play the game using keyboard.
+
+    Args:
+        env: Environment to use for playing.
+        transpose: If this is ``True``, the output of observation is transposed. Defaults to ``True``.
+        fps: Maximum number of steps of the environment executed every second. If ``None`` (the default),
+            ``env.metadata["render_fps""]`` (or 30, if the environment does not specify "render_fps") is used.
+        zoom: Zoom the observation in, ``zoom`` amount, should be positive float
+        callback: If a callback is provided, it will be executed after every step. It takes the following input:
+                obs_t: observation before performing action
+                obs_tp1: observation after performing action
+                action: action that was executed
+                rew: reward that was received
+                terminated: whether the environment is terminated or not
+                truncated: whether the environment is truncated or not
+                info: debug info
+        keys_to_action:  Mapping from keys pressed to action performed.
+            Different formats are supported: Key combinations can either be expressed as a tuple of unicode code
+            points of the keys, as a tuple of characters, or as a string where each character of the string represents
+            one key.
+            For example if pressing 'w' and space at the same time is supposed
+            to trigger action number 2 then ``key_to_action`` dict could look like this:
+
+                >>> key_to_action = {
+                ...    # ...
+                ...    (ord('w'), ord(' ')): 2
+                ...    # ...
+                ... }
+
+            or like this:
+
+                >>> key_to_action = {
+                ...    # ...
+                ...    ("w", " "): 2
+                ...    # ...
+                ... }
+
+            or like this:
+
+                >>> key_to_action = {
+                ...    # ...
+                ...    "w ": 2
+                ...    # ...
+                ... }
+
+            If ``None``, default ``key_to_action`` mapping for that environment is used, if provided.
+        seed: Random seed used when resetting the environment. If None, no seed is used.
+        noop: The action used when no key input has been entered, or the entered key combination is unknown.
+
+    Example:
+        >>> import gymnasium as gym
+        >>> from gymnasium.utils.play import play
+        >>> play(gym.make("CarRacing-v2", render_mode="rgb_array"), keys_to_action={  # doctest: +SKIP
+        ...                                                "w": np.array([0, 0.7, 0]),
+        ...                                                "a": np.array([-1, 0, 0]),
+        ...                                                "s": np.array([0, 0, 1]),
+        ...                                                "d": np.array([1, 0, 0]),
+        ...                                                "wa": np.array([-1, 0.7, 0]),
+        ...                                                "dw": np.array([1, 0.7, 0]),
+        ...                                                "ds": np.array([1, 0, 1]),
+        ...                                                "as": np.array([-1, 0, 1]),
+        ...                                               }, noop=np.array([0,0,0]))
+
+        Above code works also if the environment is wrapped, so it's particularly useful in
+        verifying that the frame-level preprocessing does not render the game
+        unplayable.
+
+        If you wish to plot real time statistics as you play, you can use
+        :class:`gym.utils.play.PlayPlot`. Here's a sample code for plotting the reward
+        for last 150 steps.
+
+        >>> import gymnasium as gym
+        >>> from gymnasium.utils.play import PlayPlot, play
+        >>> def callback(obs_t, obs_tp1, action, rew, terminated, truncated, info):
+        ...        return [rew,]
+        >>> plotter = PlayPlot(callback, 150, ["reward"])             # doctest: +SKIP
+        >>> play(gym.make("CartPole-v1"), callback=plotter.callback)  # doctest: +SKIP
+    """
+    env.reset(seed=seed)
+
+    if keys_to_action is None:
+        if hasattr(env, "get_keys_to_action"):
+            keys_to_action = env.get_keys_to_action()
+        elif hasattr(env.unwrapped, "get_keys_to_action"):
+            keys_to_action = env.unwrapped.get_keys_to_action()
+        else:
+            assert env.spec is not None
+            raise MissingKeysToAction(
+                f"{env.spec.id} does not have explicit key to action mapping, "
+                "please specify one manually"
+            )
+    assert keys_to_action is not None
+
+    key_code_to_action = {}
+    for key_combination, action in keys_to_action.items():
+        key_code = tuple(
+            sorted(ord(key) if isinstance(key, str) else key for key in key_combination)
+        )
+        key_code_to_action[key_code] = action
+
+    game = PlayableGame(env, key_code_to_action, zoom)
+
+    if fps is None:
+        fps = env.metadata.get("render_fps", 30)
+
+    done, obs = True, None
+    clock = pygame.time.Clock()
+
+    while game.running:
+        if done:
+            done = False
+            obs = env.reset(seed=seed)
+        else:
+            action = key_code_to_action.get(tuple(sorted(game.pressed_keys)), noop)
+            prev_obs = obs
+            obs, rew, terminated, truncated, info = env.step(action)
+            done = terminated or truncated
+            if callback is not None:
+                callback(prev_obs, obs, action, rew, terminated, truncated, info)
+        if obs is not None:
+            rendered = env.render()
+            if isinstance(rendered, List):
+                rendered = rendered[-1]
+            assert rendered is not None and isinstance(rendered, np.ndarray)
+            display_arr(
+                game.screen, rendered, transpose=transpose, video_size=game.video_size
+            )
+
+        # process pygame events
+        for event in pygame.event.get():
+            game.process_event(event)
+
+        pygame.display.flip()
+        clock.tick(fps)
+    pygame.quit()
+
+
+class PlayPlot:
+    """Provides a callback to create live plots of arbitrary metrics when using :func:`play`.
+
+    This class is instantiated with a function that accepts information about a single environment transition:
+        - obs_t: observation before performing action
+        - obs_tp1: observation after performing action
+        - action: action that was executed
+        - rew: reward that was received
+        - terminated: whether the environment is terminated or not
+        - truncated: whether the environment is truncated or not
+        - info: debug info
+
+    It should return a list of metrics that are computed from this data.
+    For instance, the function may look like this::
+
+        >>> def compute_metrics(obs_t, obs_tp, action, reward, terminated, truncated, info):
+        ...     return [reward, info["cumulative_reward"], np.linalg.norm(action)]
+
+    :class:`PlayPlot` provides the method :meth:`callback` which will pass its arguments along to that function
+    and uses the returned values to update live plots of the metrics.
+
+    Typically, this :meth:`callback` will be used in conjunction with :func:`play` to see how the metrics evolve as you play::
+
+        >>> plotter = PlayPlot(compute_metrics, horizon_timesteps=200,                               # doctest: +SKIP
+        ...                    plot_names=["Immediate Rew.", "Cumulative Rew.", "Action Magnitude"])
+        >>> play(your_env, callback=plotter.callback)                                                # doctest: +SKIP
+    """
+
+    def __init__(
+        self, callback: Callable, horizon_timesteps: int, plot_names: List[str]
+    ):
+        """Constructor of :class:`PlayPlot`.
+
+        The function ``callback`` that is passed to this constructor should return
+        a list of metrics that is of length ``len(plot_names)``.
+
+        Args:
+            callback: Function that computes metrics from environment transitions
+            horizon_timesteps: The time horizon used for the live plots
+            plot_names: List of plot titles
+
+        Raises:
+            DependencyNotInstalled: If matplotlib is not installed
+        """
+        deprecation(
+            "`PlayPlot` is marked as deprecated and will be removed in the near future."
+        )
+        self.data_callback = callback
+        self.horizon_timesteps = horizon_timesteps
+        self.plot_names = plot_names
+
+        if plt is None:
+            raise DependencyNotInstalled(
+                "matplotlib is not installed, run `pip install gymnasium[other]`"
+            )
+
+        num_plots = len(self.plot_names)
+        self.fig, self.ax = plt.subplots(num_plots)
+        if num_plots == 1:
+            self.ax = [self.ax]
+        for axis, name in zip(self.ax, plot_names):
+            axis.set_title(name)
+        self.t = 0
+        self.cur_plot: List[Optional[plt.Axes]] = [None for _ in range(num_plots)]
+        self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)]
+
+    def callback(
+        self,
+        obs_t: ObsType,
+        obs_tp1: ObsType,
+        action: ActType,
+        rew: float,
+        terminated: bool,
+        truncated: bool,
+        info: dict,
+    ):
+        """The callback that calls the provided data callback and adds the data to the plots.
+
+        Args:
+            obs_t: The observation at time step t
+            obs_tp1: The observation at time step t+1
+            action: The action
+            rew: The reward
+            terminated: If the environment is terminated
+            truncated: If the environment is truncated
+            info: The information from the environment
+        """
+        points = self.data_callback(
+            obs_t, obs_tp1, action, rew, terminated, truncated, info
+        )
+        for point, data_series in zip(points, self.data):
+            data_series.append(point)
+        self.t += 1
+
+        xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t
+
+        for i, plot in enumerate(self.cur_plot):
+            if plot is not None:
+                plot.remove()
+            self.cur_plot[i] = self.ax[i].scatter(
+                range(xmin, xmax), list(self.data[i]), c="blue"
+            )
+            self.ax[i].set_xlim(xmin, xmax)
+
+        if plt is None:
+            raise DependencyNotInstalled(
+                "matplotlib is not installed, run `pip install gymnasium[other]`"
+            )
+        plt.pause(0.000001)
--- a/rl/Lib/site-packages/gymnasium/utils/record_constructor.py
+++ b/rl/Lib/site-packages/gymnasium/utils/record_constructor.py
@ -0,0 +1,33 @@
+"""Allows attributes passed to `RecordConstructorArgs` to be saved. This is used by the `Wrapper.spec` to know the constructor arguments of implemented wrappers."""
+from __future__ import annotations
+
+from copy import deepcopy
+from typing import Any
+
+
+class RecordConstructorArgs:
+    """Records all arguments passed to constructor to `_saved_kwargs`.
+
+    This can be used to save and reproduce class constructor arguments.
+
+    Note:
+        If two class inherit from RecordConstructorArgs then the first class to call `RecordConstructorArgs.__init__(self, ...)` will have
+        their kwargs saved will all subsequent `RecordConstructorArgs.__init__` being ignored.
+
+        Therefore, always call `RecordConstructorArgs.__init__` before the `Class.__init__`
+
+
+    """
+
+    def __init__(self, *, _disable_deepcopy: bool = False, **kwargs: Any):
+        """Records all arguments passed to constructor to `_saved_kwargs`.
+
+        Args:
+            _disable_deepcopy: If to not deepcopy the kwargs passed
+            **kwargs: Arguments to save
+        """
+        # See class docstring for explanation
+        if not hasattr(self, "_saved_kwargs"):
+            if _disable_deepcopy is False:
+                kwargs = deepcopy(kwargs)
+            self._saved_kwargs: dict[str, Any] = kwargs
--- a/rl/Lib/site-packages/gymnasium/utils/save_video.py
+++ b/rl/Lib/site-packages/gymnasium/utils/save_video.py
@ -0,0 +1,109 @@
+"""Utility functions to save rendering videos."""
+import os
+from typing import Callable, Optional
+
+import gymnasium as gym
+from gymnasium import logger
+
+
+try:
+    from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
+except ImportError as e:
+    raise gym.error.DependencyNotInstalled(
+        "moviepy is not installed, run `pip install moviepy`"
+    ) from e
+
+
+def capped_cubic_video_schedule(episode_id: int) -> bool:
+    """The default episode trigger.
+
+    This function will trigger recordings at the episode indices 0, 1, 4, 8, 27, ..., :math:`k^3`, ..., 729, 1000, 2000, 3000, ...
+
+    Args:
+        episode_id: The episode number
+
+    Returns:
+        If to apply a video schedule number
+    """
+    if episode_id < 1000:
+        return int(round(episode_id ** (1.0 / 3))) ** 3 == episode_id
+    else:
+        return episode_id % 1000 == 0
+
+
+def save_video(
+    frames: list,
+    video_folder: str,
+    episode_trigger: Callable[[int], bool] = None,
+    step_trigger: Callable[[int], bool] = None,
+    video_length: Optional[int] = None,
+    name_prefix: str = "rl-video",
+    episode_index: int = 0,
+    step_starting_index: int = 0,
+    **kwargs,
+):
+    """Save videos from rendering frames.
+
+    This function extract video from a list of render frame episodes.
+
+    Args:
+        frames (List[RenderFrame]): A list of frames to compose the video.
+        video_folder (str): The folder where the recordings will be stored
+        episode_trigger: Function that accepts an integer and returns ``True`` iff a recording should be started at this episode
+        step_trigger: Function that accepts an integer and returns ``True`` iff a recording should be started at this step
+        video_length (int): The length of recorded episodes. If it isn't specified, the entire episode is recorded.
+            Otherwise, snippets of the specified length are captured.
+        name_prefix (str): Will be prepended to the filename of the recordings.
+        episode_index (int): The index of the current episode.
+        step_starting_index (int): The step index of the first frame.
+        **kwargs: The kwargs that will be passed to moviepy's ImageSequenceClip.
+            You need to specify either fps or duration.
+
+    Example:
+        >>> import gymnasium as gym
+        >>> from gymnasium.utils.save_video import save_video
+        >>> env = gym.make("FrozenLake-v1", render_mode="rgb_array_list")
+        >>> _ = env.reset()
+        >>> step_starting_index = 0
+        >>> episode_index = 0
+        >>> for step_index in range(199): # doctest: +SKIP
+        ...    action = env.action_space.sample()
+        ...    _, _, terminated, truncated, _ = env.step(action)
+        ...
+        ...    if terminated or truncated:
+        ...       save_video(
+        ...          env.render(),
+        ...          "videos",
+        ...          fps=env.metadata["render_fps"],
+        ...          step_starting_index=step_starting_index,
+        ...          episode_index=episode_index
+        ...       )
+        ...       step_starting_index = step_index + 1
+        ...       episode_index += 1
+        ...       env.reset()
+        >>> env.close()
+    """
+    if not isinstance(frames, list):
+        logger.error(f"Expected a list of frames, got a {type(frames)} instead.")
+    if episode_trigger is None and step_trigger is None:
+        episode_trigger = capped_cubic_video_schedule
+
+    video_folder = os.path.abspath(video_folder)
+    os.makedirs(video_folder, exist_ok=True)
+    path_prefix = f"{video_folder}/{name_prefix}"
+
+    if episode_trigger is not None and episode_trigger(episode_index):
+        clip = ImageSequenceClip(frames[:video_length], **kwargs)
+        clip.write_videofile(f"{path_prefix}-episode-{episode_index}.mp4")
+
+    if step_trigger is not None:
+        # skip the first frame since it comes from reset
+        for step_index, frame_index in enumerate(
+            range(1, len(frames)), start=step_starting_index
+        ):
+            if step_trigger(step_index):
+                end_index = (
+                    frame_index + video_length if video_length is not None else None
+                )
+                clip = ImageSequenceClip(frames[frame_index:end_index], **kwargs)
+                clip.write_videofile(f"{path_prefix}-step-{step_index}.mp4")
--- a/rl/Lib/site-packages/gymnasium/utils/seeding.py
+++ b/rl/Lib/site-packages/gymnasium/utils/seeding.py
@ -0,0 +1,37 @@
+"""Set of random number generator functions: seeding, generator, hashing seeds."""
+from typing import Any, Optional, Tuple
+
+import numpy as np
+
+from gymnasium import error
+
+
+def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, Any]:
+    """Generates a random number generator from the seed and returns the Generator and seed.
+
+    Args:
+        seed: The seed used to create the generator
+
+    Returns:
+        The generator and resulting seed
+
+    Raises:
+        Error: Seed must be a non-negative integer or omitted
+    """
+    if seed is not None and not (isinstance(seed, int) and 0 <= seed):
+        if isinstance(seed, int) is False:
+            raise error.Error(
+                f"Seed must be a python integer, actual type: {type(seed)}"
+            )
+        else:
+            raise error.Error(
+                f"Seed must be greater or equal to zero, actual value: {seed}"
+            )
+
+    seed_seq = np.random.SeedSequence(seed)
+    np_seed = seed_seq.entropy
+    rng = RandomNumberGenerator(np.random.PCG64(seed_seq))
+    return rng, np_seed
+
+
+RNG = RandomNumberGenerator = np.random.Generator
--- a/rl/Lib/site-packages/gymnasium/utils/step_api_compatibility.py
+++ b/rl/Lib/site-packages/gymnasium/utils/step_api_compatibility.py
@ -0,0 +1,169 @@
+"""Contains methods for step compatibility, from old-to-new and new-to-old API."""
+from typing import SupportsFloat, Tuple, Union
+
+import numpy as np
+
+from gymnasium.core import ObsType
+
+
+DoneStepType = Tuple[
+    Union[ObsType, np.ndarray],
+    Union[SupportsFloat, np.ndarray],
+    Union[bool, np.ndarray],
+    Union[dict, list],
+]
+
+TerminatedTruncatedStepType = Tuple[
+    Union[ObsType, np.ndarray],
+    Union[SupportsFloat, np.ndarray],
+    Union[bool, np.ndarray],
+    Union[bool, np.ndarray],
+    Union[dict, list],
+]
+
+
+def convert_to_terminated_truncated_step_api(
+    step_returns: Union[DoneStepType, TerminatedTruncatedStepType], is_vector_env=False
+) -> TerminatedTruncatedStepType:
+    """Function to transform step returns to new step API irrespective of input API.
+
+    Args:
+        step_returns (tuple): Items returned by step(). Can be (obs, rew, done, info) or (obs, rew, terminated, truncated, info)
+        is_vector_env (bool): Whether the step_returns are from a vector environment
+    """
+    if len(step_returns) == 5:
+        return step_returns
+    else:
+        assert len(step_returns) == 4
+        observations, rewards, dones, infos = step_returns
+
+        # Cases to handle - info single env /  info vector env (list) / info vector env (dict)
+        if is_vector_env is False:
+            truncated = infos.pop("TimeLimit.truncated", False)
+            return (
+                observations,
+                rewards,
+                dones and not truncated,
+                dones and truncated,
+                infos,
+            )
+        elif isinstance(infos, list):
+            truncated = np.array(
+                [info.pop("TimeLimit.truncated", False) for info in infos]
+            )
+            return (
+                observations,
+                rewards,
+                np.logical_and(dones, np.logical_not(truncated)),
+                np.logical_and(dones, truncated),
+                infos,
+            )
+        elif isinstance(infos, dict):
+            num_envs = len(dones)
+            truncated = infos.pop("TimeLimit.truncated", np.zeros(num_envs, dtype=bool))
+            return (
+                observations,
+                rewards,
+                np.logical_and(dones, np.logical_not(truncated)),
+                np.logical_and(dones, truncated),
+                infos,
+            )
+        else:
+            raise TypeError(
+                f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}"
+            )
+
+
+def convert_to_done_step_api(
+    step_returns: Union[TerminatedTruncatedStepType, DoneStepType],
+    is_vector_env: bool = False,
+) -> DoneStepType:
+    """Function to transform step returns to old step API irrespective of input API.
+
+    Args:
+        step_returns (tuple): Items returned by step(). Can be (obs, rew, done, info) or (obs, rew, terminated, truncated, info)
+        is_vector_env (bool): Whether the step_returns are from a vector environment
+    """
+    if len(step_returns) == 4:
+        return step_returns
+    else:
+        assert len(step_returns) == 5
+        observations, rewards, terminated, truncated, infos = step_returns
+
+        # Cases to handle - info single env /  info vector env (list) / info vector env (dict)
+        if is_vector_env is False:
+            if truncated or terminated:
+                infos["TimeLimit.truncated"] = truncated and not terminated
+            return (
+                observations,
+                rewards,
+                terminated or truncated,
+                infos,
+            )
+        elif isinstance(infos, list):
+            for info, env_truncated, env_terminated in zip(
+                infos, truncated, terminated
+            ):
+                if env_truncated or env_terminated:
+                    info["TimeLimit.truncated"] = env_truncated and not env_terminated
+            return (
+                observations,
+                rewards,
+                np.logical_or(terminated, truncated),
+                infos,
+            )
+        elif isinstance(infos, dict):
+            if np.logical_or(np.any(truncated), np.any(terminated)):
+                infos["TimeLimit.truncated"] = np.logical_and(
+                    truncated, np.logical_not(terminated)
+                )
+            return (
+                observations,
+                rewards,
+                np.logical_or(terminated, truncated),
+                infos,
+            )
+        else:
+            raise TypeError(
+                f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}"
+            )
+
+
+def step_api_compatibility(
+    step_returns: Union[TerminatedTruncatedStepType, DoneStepType],
+    output_truncation_bool: bool = True,
+    is_vector_env: bool = False,
+) -> Union[TerminatedTruncatedStepType, DoneStepType]:
+    """Function to transform step returns to the API specified by `output_truncation_bool` bool.
+
+    Done (old) step API refers to step() method returning (observation, reward, done, info)
+    Terminated Truncated (new) step API refers to step() method returning (observation, reward, terminated, truncated, info)
+    (Refer to docs for details on the API change)
+
+    Args:
+        step_returns (tuple): Items returned by step(). Can be (obs, rew, done, info) or (obs, rew, terminated, truncated, info)
+        output_truncation_bool (bool): Whether the output should return two booleans (new API) or one (old) (True by default)
+        is_vector_env (bool): Whether the step_returns are from a vector environment
+
+    Returns:
+        step_returns (tuple): Depending on `output_truncation_bool` bool, it can return `(obs, rew, done, info)` or `(obs, rew, terminated, truncated, info)`
+
+    Example:
+        This function can be used to ensure compatibility in step interfaces with conflicting API. Eg. if env is written in old API,
+         wrapper is written in new API, and the final step output is desired to be in old API.
+
+        >>> import gymnasium as gym
+        >>> env = gym.make("CartPole-v0")
+        >>> _ = env.reset()
+        >>> obs, rewards, done, info = step_api_compatibility(env.step(0), output_truncation_bool=False)
+        >>> obs, rewards, terminated, truncated, info = step_api_compatibility(env.step(0), output_truncation_bool=True)
+
+        >>> vec_env = gym.vector.make("CartPole-v0")
+        >>> _ = vec_env.reset()
+        >>> obs, rewards, dones, infos = step_api_compatibility(vec_env.step([0]), is_vector_env=True, output_truncation_bool=False)
+        >>> obs, rewards, terminated, truncated, info = step_api_compatibility(vec_env.step([0]), is_vector_env=True, output_truncation_bool=True)
+    """
+    if output_truncation_bool:
+        return convert_to_terminated_truncated_step_api(step_returns, is_vector_env)
+    else:
+        return convert_to_done_step_api(step_returns, is_vector_env)