I am done

2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/init.py
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/init.py
@ -0,0 +1,7 @@
+from gymnasium.envs.classic_control.acrobot import AcrobotEnv
+from gymnasium.envs.classic_control.cartpole import CartPoleEnv
+from gymnasium.envs.classic_control.continuous_mountain_car import (
+    Continuous_MountainCarEnv,
+)
+from gymnasium.envs.classic_control.mountain_car import MountainCarEnv
+from gymnasium.envs.classic_control.pendulum import PendulumEnv
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/init.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/init.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/acrobot.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/acrobot.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/cartpole.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/cartpole.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/continuous_mountain_car.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/continuous_mountain_car.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/mountain_car.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/mountain_car.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/pendulum.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/pendulum.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/utils.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pycache/utils.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/acrobot.py
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/acrobot.py
@ -0,0 +1,470 @@
+"""classic Acrobot task"""
+from typing import Optional
+
+import numpy as np
+from numpy import cos, pi, sin
+
+import gymnasium as gym
+from gymnasium import Env, spaces
+from gymnasium.envs.classic_control import utils
+from gymnasium.error import DependencyNotInstalled
+
+
+__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
+__credits__ = [
+    "Alborz Geramifard",
+    "Robert H. Klein",
+    "Christoph Dann",
+    "William Dabney",
+    "Jonathan P. How",
+]
+__license__ = "BSD 3-Clause"
+__author__ = "Christoph Dann <cdann@cdann.de>"
+
+# SOURCE:
+# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
+
+
+class AcrobotEnv(Env):
+    """
+    ## Description
+
+    The Acrobot environment is based on Sutton's work in
+    ["Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding"](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html)
+    and [Sutton and Barto's book](http://www.incompleteideas.net/book/the-book-2nd.html).
+    The system consists of two links connected linearly to form a chain, with one end of
+    the chain fixed. The joint between the two links is actuated. The goal is to apply
+    torques on the actuated joint to swing the free end of the linear chain above a
+    given height while starting from the initial state of hanging downwards.
+
+    As seen in the **Gif**: two blue links connected by two green joints. The joint in
+    between the two links is actuated. The goal is to swing the free end of the outer-link
+    to reach the target height (black horizontal line above system) by applying torque on
+    the actuator.
+
+    ## Action Space
+
+    The action is discrete, deterministic, and represents the torque applied on the actuated
+    joint between the two links.
+
+    | Num | Action                                | Unit         |
+    |-----|---------------------------------------|--------------|
+    | 0   | apply -1 torque to the actuated joint | torque (N m) |
+    | 1   | apply 0 torque to the actuated joint  | torque (N m) |
+    | 2   | apply 1 torque to the actuated joint  | torque (N m) |
+
+    ## Observation Space
+
+    The observation is a `ndarray` with shape `(6,)` that provides information about the
+    two rotational joint angles as well as their angular velocities:
+
+    | Num | Observation                  | Min                 | Max               |
+    |-----|------------------------------|---------------------|-------------------|
+    | 0   | Cosine of `theta1`           | -1                  | 1                 |
+    | 1   | Sine of `theta1`             | -1                  | 1                 |
+    | 2   | Cosine of `theta2`           | -1                  | 1                 |
+    | 3   | Sine of `theta2`             | -1                  | 1                 |
+    | 4   | Angular velocity of `theta1` | ~ -12.567 (-4 * pi) | ~ 12.567 (4 * pi) |
+    | 5   | Angular velocity of `theta2` | ~ -28.274 (-9 * pi) | ~ 28.274 (9 * pi) |
+
+    where
+    - `theta1` is the angle of the first joint, where an angle of 0 indicates the first link is pointing directly
+    downwards.
+    - `theta2` is ***relative to the angle of the first link.***
+        An angle of 0 corresponds to having the same angle between the two links.
+
+    The angular velocities of `theta1` and `theta2` are bounded at ±4π, and ±9π rad/s respectively.
+    A state of `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards.
+
+    ## Rewards
+
+    The goal is to have the free end reach a designated target height in as few steps as possible,
+    and as such all steps that do not reach the goal incur a reward of -1.
+    Achieving the target height results in termination with a reward of 0. The reward threshold is -100.
+
+    ## Starting State
+
+    Each parameter in the underlying state (`theta1`, `theta2`, and the two angular velocities) is initialized
+    uniformly between -0.1 and 0.1. This means both links are pointing downwards with some initial stochasticity.
+
+    ## Episode End
+
+    The episode ends if one of the following occurs:
+    1. Termination: The free end reaches the target height, which is constructed as:
+    `-cos(theta1) - cos(theta2 + theta1) > 1.0`
+    2. Truncation: Episode length is greater than 500 (200 for v0)
+
+    ## Arguments
+
+    No additional arguments are currently supported during construction.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Acrobot-v1')
+    ```
+
+    On reset, the `options` parameter allows the user to change the bounds used to determine
+    the new random state.
+
+    By default, the dynamics of the acrobot follow those described in Sutton and Barto's book
+    [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html).
+    However, a `book_or_nips` parameter can be modified to change the pendulum dynamics to those described
+    in the original [NeurIPS paper](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html).
+
+    ```python
+    # To change the dynamics as described above
+    env.unwrapped.book_or_nips = 'nips'
+    ```
+
+    See the following note for details:
+
+    > The dynamics equations were missing some terms in the NIPS paper which
+            are present in the book. R. Sutton confirmed in personal correspondence
+            that the experimental results shown in the paper and the book were
+            generated with the equations shown in the book.
+            However, there is the option to run the domain with the paper equations
+            by setting `book_or_nips = 'nips'`
+
+
+    ## Version History
+
+    - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of
+    `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the
+    sine and cosine of each angle instead.
+    - v0: Initial versions release (1.0.0) (removed from gymnasium for v1)
+
+    ## References
+    - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding.
+        In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8).
+        MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf
+    - Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press.
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": 15,
+    }
+
+    dt = 0.2
+
+    LINK_LENGTH_1 = 1.0  # [m]
+    LINK_LENGTH_2 = 1.0  # [m]
+    LINK_MASS_1 = 1.0  #: [kg] mass of link 1
+    LINK_MASS_2 = 1.0  #: [kg] mass of link 2
+    LINK_COM_POS_1 = 0.5  #: [m] position of the center of mass of link 1
+    LINK_COM_POS_2 = 0.5  #: [m] position of the center of mass of link 2
+    LINK_MOI = 1.0  #: moments of inertia for both links
+
+    MAX_VEL_1 = 4 * pi
+    MAX_VEL_2 = 9 * pi
+
+    AVAIL_TORQUE = [-1.0, 0.0, +1]
+
+    torque_noise_max = 0.0
+
+    SCREEN_DIM = 500
+
+    #: use dynamics equations from the nips paper or the book
+    book_or_nips = "book"
+    action_arrow = None
+    domain_fig = None
+    actions_num = 3
+
+    def __init__(self, render_mode: Optional[str] = None):
+        self.render_mode = render_mode
+        self.screen = None
+        self.clock = None
+        self.isopen = True
+        high = np.array(
+            [1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32
+        )
+        low = -high
+        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
+        self.action_space = spaces.Discrete(3)
+        self.state = None
+
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
+        super().reset(seed=seed)
+        # Note that if you use custom reset bounds, it may lead to out-of-bound
+        # state/observations.
+        low, high = utils.maybe_parse_reset_bounds(
+            options, -0.1, 0.1  # default low
+        )  # default high
+        self.state = self.np_random.uniform(low=low, high=high, size=(4,)).astype(
+            np.float32
+        )
+
+        if self.render_mode == "human":
+            self.render()
+        return self._get_ob(), {}
+
+    def step(self, a):
+        s = self.state
+        assert s is not None, "Call reset before using AcrobotEnv object."
+        torque = self.AVAIL_TORQUE[a]
+
+        # Add noise to the force action
+        if self.torque_noise_max > 0:
+            torque += self.np_random.uniform(
+                -self.torque_noise_max, self.torque_noise_max
+            )
+
+        # Now, augment the state with our force action so it can be passed to
+        # _dsdt
+        s_augmented = np.append(s, torque)
+
+        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
+
+        ns[0] = wrap(ns[0], -pi, pi)
+        ns[1] = wrap(ns[1], -pi, pi)
+        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
+        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
+        self.state = ns
+        terminated = self._terminal()
+        reward = -1.0 if not terminated else 0.0
+
+        if self.render_mode == "human":
+            self.render()
+        return (self._get_ob(), reward, terminated, False, {})
+
+    def _get_ob(self):
+        s = self.state
+        assert s is not None, "Call reset before using AcrobotEnv object."
+        return np.array(
+            [cos(s[0]), sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]], dtype=np.float32
+        )
+
+    def _terminal(self):
+        s = self.state
+        assert s is not None, "Call reset before using AcrobotEnv object."
+        return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.0)
+
+    def _dsdt(self, s_augmented):
+        m1 = self.LINK_MASS_1
+        m2 = self.LINK_MASS_2
+        l1 = self.LINK_LENGTH_1
+        lc1 = self.LINK_COM_POS_1
+        lc2 = self.LINK_COM_POS_2
+        I1 = self.LINK_MOI
+        I2 = self.LINK_MOI
+        g = 9.8
+        a = s_augmented[-1]
+        s = s_augmented[:-1]
+        theta1 = s[0]
+        theta2 = s[1]
+        dtheta1 = s[2]
+        dtheta2 = s[3]
+        d1 = (
+            m1 * lc1**2
+            + m2 * (l1**2 + lc2**2 + 2 * l1 * lc2 * cos(theta2))
+            + I1
+            + I2
+        )
+        d2 = m2 * (lc2**2 + l1 * lc2 * cos(theta2)) + I2
+        phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.0)
+        phi1 = (
+            -m2 * l1 * lc2 * dtheta2**2 * sin(theta2)
+            - 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2)
+            + (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2)
+            + phi2
+        )
+        if self.book_or_nips == "nips":
+            # the following line is consistent with the description in the
+            # paper
+            ddtheta2 = (a + d2 / d1 * phi1 - phi2) / (m2 * lc2**2 + I2 - d2**2 / d1)
+        else:
+            # the following line is consistent with the java implementation and the
+            # book
+            ddtheta2 = (
+                a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1**2 * sin(theta2) - phi2
+            ) / (m2 * lc2**2 + I2 - d2**2 / d1)
+        ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
+        return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError as e:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[classic-control]`"
+            ) from e
+
+        if self.screen is None:
+            pygame.init()
+            if self.render_mode == "human":
+                pygame.display.init()
+                self.screen = pygame.display.set_mode(
+                    (self.SCREEN_DIM, self.SCREEN_DIM)
+                )
+            else:  # mode in "rgb_array"
+                self.screen = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        surf = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM))
+        surf.fill((255, 255, 255))
+        s = self.state
+
+        bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2  # 2.2 for default
+        scale = self.SCREEN_DIM / (bound * 2)
+        offset = self.SCREEN_DIM / 2
+
+        if s is None:
+            return None
+
+        p1 = [
+            -self.LINK_LENGTH_1 * cos(s[0]) * scale,
+            self.LINK_LENGTH_1 * sin(s[0]) * scale,
+        ]
+
+        p2 = [
+            p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]) * scale,
+            p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1]) * scale,
+        ]
+
+        xys = np.array([[0, 0], p1, p2])[:, ::-1]
+        thetas = [s[0] - pi / 2, s[0] + s[1] - pi / 2]
+        link_lengths = [self.LINK_LENGTH_1 * scale, self.LINK_LENGTH_2 * scale]
+
+        pygame.draw.line(
+            surf,
+            start_pos=(-2.2 * scale + offset, 1 * scale + offset),
+            end_pos=(2.2 * scale + offset, 1 * scale + offset),
+            color=(0, 0, 0),
+        )
+
+        for (x, y), th, llen in zip(xys, thetas, link_lengths):
+            x = x + offset
+            y = y + offset
+            l, r, t, b = 0, llen, 0.1 * scale, -0.1 * scale
+            coords = [(l, b), (l, t), (r, t), (r, b)]
+            transformed_coords = []
+            for coord in coords:
+                coord = pygame.math.Vector2(coord).rotate_rad(th)
+                coord = (coord[0] + x, coord[1] + y)
+                transformed_coords.append(coord)
+            gfxdraw.aapolygon(surf, transformed_coords, (0, 204, 204))
+            gfxdraw.filled_polygon(surf, transformed_coords, (0, 204, 204))
+
+            gfxdraw.aacircle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0))
+            gfxdraw.filled_circle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0))
+
+        surf = pygame.transform.flip(surf, False, True)
+        self.screen.blit(surf, (0, 0))
+
+        if self.render_mode == "human":
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            pygame.display.flip()
+
+        elif self.render_mode == "rgb_array":
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
+            )
+
+    def close(self):
+        if self.screen is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
+
+
+def wrap(x, m, M):
+    """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
+    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
+    For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
+
+    Args:
+        x: a scalar
+        m: minimum possible value in range
+        M: maximum possible value in range
+
+    Returns:
+        x: a scalar, wrapped
+    """
+    diff = M - m
+    while x > M:
+        x = x - diff
+    while x < m:
+        x = x + diff
+    return x
+
+
+def bound(x, m, M=None):
+    """Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
+    have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
+
+    Args:
+        x: scalar
+        m: The lower bound
+        M: The upper bound
+
+    Returns:
+        x: scalar, bound between min (m) and Max (M)
+    """
+    if M is None:
+        M = m[1]
+        m = m[0]
+    # bound x between min (m) and Max (M)
+    return min(max(x, m), M)
+
+
+def rk4(derivs, y0, t):
+    """
+    Integrate 1-D or N-D system of ODEs using 4-th order Runge-Kutta.
+
+    Example for 2D system:
+
+        >>> def derivs(x):
+        ...     d1 =  x[0] + 2*x[1]
+        ...     d2 =  -3*x[0] + 4*x[1]
+        ...     return d1, d2
+
+        >>> dt = 0.0005
+        >>> t = np.arange(0.0, 2.0, dt)
+        >>> y0 = (1,2)
+        >>> yout = rk4(derivs, y0, t)
+
+    Args:
+        derivs: the derivative of the system and has the signature ``dy = derivs(yi)``
+        y0: initial state vector
+        t: sample times
+
+    Returns:
+        yout: Runge-Kutta approximation of the ODE
+    """
+
+    try:
+        Ny = len(y0)
+    except TypeError:
+        yout = np.zeros((len(t),), np.float_)
+    else:
+        yout = np.zeros((len(t), Ny), np.float_)
+
+    yout[0] = y0
+
+    for i in np.arange(len(t) - 1):
+        this = t[i]
+        dt = t[i + 1] - this
+        dt2 = dt / 2.0
+        y0 = yout[i]
+
+        k1 = np.asarray(derivs(y0))
+        k2 = np.asarray(derivs(y0 + dt2 * k1))
+        k3 = np.asarray(derivs(y0 + dt2 * k2))
+        k4 = np.asarray(derivs(y0 + dt * k3))
+        yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
+    # We only care about the final timestep and we cleave off action value which will be zero
+    return yout[-1][:4]
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/assets/clockwise.png
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/assets/clockwise.png
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/assets/clockwise.png.import
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/assets/clockwise.png.import
@ -0,0 +1,34 @@
+[remap]
+
+importer="texture"
+type="CompressedTexture2D"
+uid="uid://dk5m5liwecsk"
+path="res://.godot/imported/clockwise.png-0b6c4e15d302e93ea5c480ebf7734edb.ctex"
+metadata={
+"vram_texture": false
+}
+
+[deps]
+
+source_file="res://rl/Lib/site-packages/gymnasium/envs/classic_control/assets/clockwise.png"
+dest_files=["res://.godot/imported/clockwise.png-0b6c4e15d302e93ea5c480ebf7734edb.ctex"]
+
+[params]
+
+compress/mode=0
+compress/high_quality=false
+compress/lossy_quality=0.7
+compress/hdr_compression=1
+compress/normal_map=0
+compress/channel_pack=0
+mipmaps/generate=false
+mipmaps/limit=-1
+roughness/mode=0
+roughness/src_normal=""
+process/fix_alpha_border=true
+process/premult_alpha=false
+process/normal_map_invert_y=false
+process/hdr_as_srgb=false
+process/hdr_clamp_exposure=false
+process/size_limit=0
+detect_3d/compress_to=1
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/cartpole.py
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/cartpole.py
@ -0,0 +1,577 @@
+"""
+Classic cart-pole system implemented by Rich Sutton et al.
+Copied from http://incompleteideas.net/sutton/book/code/pole.c
+permalink: https://perma.cc/C9ZM-652R
+"""
+import math
+from typing import Optional, Tuple, Union
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import logger, spaces
+from gymnasium.envs.classic_control import utils
+from gymnasium.error import DependencyNotInstalled
+from gymnasium.experimental.vector import VectorEnv
+from gymnasium.vector.utils import batch_space
+
+
+class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
+    """
+    ## Description
+
+    This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in
+    ["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077).
+    A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
+    The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces
+     in the left and right direction on the cart.
+
+    ## Action Space
+
+    The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
+     of the fixed force the cart is pushed with.
+
+    - 0: Push cart to the left
+    - 1: Push cart to the right
+
+    **Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle
+     the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it
+
+    ## Observation Space
+
+    The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:
+
+    | Num | Observation           | Min                 | Max               |
+    |-----|-----------------------|---------------------|-------------------|
+    | 0   | Cart Position         | -4.8                | 4.8               |
+    | 1   | Cart Velocity         | -Inf                | Inf               |
+    | 2   | Pole Angle            | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
+    | 3   | Pole Angular Velocity | -Inf                | Inf               |
+
+    **Note:** While the ranges above denote the possible values for observation space of each element,
+        it is not reflective of the allowed values of the state space in an unterminated episode. Particularly:
+    -  The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates
+       if the cart leaves the `(-2.4, 2.4)` range.
+    -  The pole angle can be observed between  `(-.418, .418)` radians (or **±24°**), but the episode terminates
+       if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**)
+
+    ## Rewards
+
+    Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken,
+    including the termination step, is allotted. The threshold for rewards is 500 for v1 and 200 for v0.
+
+    ## Starting State
+
+    All observations are assigned a uniformly random value in `(-0.05, 0.05)`
+
+    ## Episode End
+
+    The episode ends if any one of the following occurs:
+
+    1. Termination: Pole Angle is greater than ±12°
+    2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display)
+    3. Truncation: Episode length is greater than 500 (200 for v0)
+
+    ## Arguments
+
+    ```python
+    import gymnasium as gym
+    gym.make('CartPole-v1')
+    ```
+
+    On reset, the `options` parameter allows the user to change the bounds used to determine
+    the new random state.
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": 50,
+    }
+
+    def __init__(self, render_mode: Optional[str] = None):
+        self.gravity = 9.8
+        self.masscart = 1.0
+        self.masspole = 0.1
+        self.total_mass = self.masspole + self.masscart
+        self.length = 0.5  # actually half the pole's length
+        self.polemass_length = self.masspole * self.length
+        self.force_mag = 10.0
+        self.tau = 0.02  # seconds between state updates
+        self.kinematics_integrator = "euler"
+
+        # Angle at which to fail the episode
+        self.theta_threshold_radians = 12 * 2 * math.pi / 360
+        self.x_threshold = 2.4
+
+        # Angle limit set to 2 * theta_threshold_radians so failing observation
+        # is still within bounds.
+        high = np.array(
+            [
+                self.x_threshold * 2,
+                np.finfo(np.float32).max,
+                self.theta_threshold_radians * 2,
+                np.finfo(np.float32).max,
+            ],
+            dtype=np.float32,
+        )
+
+        self.action_space = spaces.Discrete(2)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
+
+        self.render_mode = render_mode
+
+        self.screen_width = 600
+        self.screen_height = 400
+        self.screen = None
+        self.clock = None
+        self.isopen = True
+        self.state = None
+
+        self.steps_beyond_terminated = None
+
+    def step(self, action):
+        assert self.action_space.contains(
+            action
+        ), f"{action!r} ({type(action)}) invalid"
+        assert self.state is not None, "Call reset before using step method."
+        x, x_dot, theta, theta_dot = self.state
+        force = self.force_mag if action == 1 else -self.force_mag
+        costheta = math.cos(theta)
+        sintheta = math.sin(theta)
+
+        # For the interested reader:
+        # https://coneural.org/florian/papers/05_cart_pole.pdf
+        temp = (
+            force + self.polemass_length * theta_dot**2 * sintheta
+        ) / self.total_mass
+        thetaacc = (self.gravity * sintheta - costheta * temp) / (
+            self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass)
+        )
+        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
+
+        if self.kinematics_integrator == "euler":
+            x = x + self.tau * x_dot
+            x_dot = x_dot + self.tau * xacc
+            theta = theta + self.tau * theta_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+        else:  # semi-implicit euler
+            x_dot = x_dot + self.tau * xacc
+            x = x + self.tau * x_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+            theta = theta + self.tau * theta_dot
+
+        self.state = (x, x_dot, theta, theta_dot)
+
+        terminated = bool(
+            x < -self.x_threshold
+            or x > self.x_threshold
+            or theta < -self.theta_threshold_radians
+            or theta > self.theta_threshold_radians
+        )
+
+        if not terminated:
+            reward = 1.0
+        elif self.steps_beyond_terminated is None:
+            # Pole just fell!
+            self.steps_beyond_terminated = 0
+            reward = 1.0
+        else:
+            if self.steps_beyond_terminated == 0:
+                logger.warn(
+                    "You are calling 'step()' even though this "
+                    "environment has already returned terminated = True. You "
+                    "should always call 'reset()' once you receive 'terminated = "
+                    "True' -- any further steps are undefined behavior."
+                )
+            self.steps_beyond_terminated += 1
+            reward = 0.0
+
+        if self.render_mode == "human":
+            self.render()
+        return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        super().reset(seed=seed)
+        # Note that if you use custom reset bounds, it may lead to out-of-bound
+        # state/observations.
+        low, high = utils.maybe_parse_reset_bounds(
+            options, -0.05, 0.05  # default low
+        )  # default high
+        self.state = self.np_random.uniform(low=low, high=high, size=(4,))
+        self.steps_beyond_terminated = None
+
+        if self.render_mode == "human":
+            self.render()
+        return np.array(self.state, dtype=np.float32), {}
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError as e:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[classic-control]`"
+            ) from e
+
+        if self.screen is None:
+            pygame.init()
+            if self.render_mode == "human":
+                pygame.display.init()
+                self.screen = pygame.display.set_mode(
+                    (self.screen_width, self.screen_height)
+                )
+            else:  # mode == "rgb_array"
+                self.screen = pygame.Surface((self.screen_width, self.screen_height))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        world_width = self.x_threshold * 2
+        scale = self.screen_width / world_width
+        polewidth = 10.0
+        polelen = scale * (2 * self.length)
+        cartwidth = 50.0
+        cartheight = 30.0
+
+        if self.state is None:
+            return None
+
+        x = self.state
+
+        self.surf = pygame.Surface((self.screen_width, self.screen_height))
+        self.surf.fill((255, 255, 255))
+
+        l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
+        axleoffset = cartheight / 4.0
+        cartx = x[0] * scale + self.screen_width / 2.0  # MIDDLE OF CART
+        carty = 100  # TOP OF CART
+        cart_coords = [(l, b), (l, t), (r, t), (r, b)]
+        cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords]
+        gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0))
+        gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0))
+
+        l, r, t, b = (
+            -polewidth / 2,
+            polewidth / 2,
+            polelen - polewidth / 2,
+            -polewidth / 2,
+        )
+
+        pole_coords = []
+        for coord in [(l, b), (l, t), (r, t), (r, b)]:
+            coord = pygame.math.Vector2(coord).rotate_rad(-x[2])
+            coord = (coord[0] + cartx, coord[1] + carty + axleoffset)
+            pole_coords.append(coord)
+        gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101))
+        gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101))
+
+        gfxdraw.aacircle(
+            self.surf,
+            int(cartx),
+            int(carty + axleoffset),
+            int(polewidth / 2),
+            (129, 132, 203),
+        )
+        gfxdraw.filled_circle(
+            self.surf,
+            int(cartx),
+            int(carty + axleoffset),
+            int(polewidth / 2),
+            (129, 132, 203),
+        )
+
+        gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0))
+
+        self.surf = pygame.transform.flip(self.surf, False, True)
+        self.screen.blit(self.surf, (0, 0))
+        if self.render_mode == "human":
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            pygame.display.flip()
+
+        elif self.render_mode == "rgb_array":
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
+            )
+
+    def close(self):
+        if self.screen is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
+
+
+class CartPoleVectorEnv(VectorEnv):
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": 50,
+    }
+
+    def __init__(
+        self,
+        num_envs: int = 2,
+        max_episode_steps: int = 500,
+        render_mode: Optional[str] = None,
+    ):
+        super().__init__()
+        self.num_envs = num_envs
+        self.gravity = 9.8
+        self.masscart = 1.0
+        self.masspole = 0.1
+        self.total_mass = self.masspole + self.masscart
+        self.length = 0.5  # actually half the pole's length
+        self.polemass_length = self.masspole * self.length
+        self.force_mag = 10.0
+        self.tau = 0.02  # seconds between state updates
+        self.kinematics_integrator = "euler"
+        self.max_episode_steps = max_episode_steps
+
+        self.steps = np.zeros(num_envs, dtype=np.int32)
+
+        # Angle at which to fail the episode
+        self.theta_threshold_radians = 12 * 2 * math.pi / 360
+        self.x_threshold = 2.4
+
+        # Angle limit set to 2 * theta_threshold_radians so failing observation
+        # is still within bounds.
+        high = np.array(
+            [
+                self.x_threshold * 2,
+                np.finfo(np.float32).max,
+                self.theta_threshold_radians * 2,
+                np.finfo(np.float32).max,
+            ],
+            dtype=np.float32,
+        )
+
+        self.low = -0.05
+        self.high = 0.05
+
+        self.single_action_space = spaces.Discrete(2)
+        self.action_space = batch_space(self.single_action_space, num_envs)
+        self.single_observation_space = spaces.Box(-high, high, dtype=np.float32)
+        self.observation_space = batch_space(self.single_observation_space, num_envs)
+
+        self.render_mode = render_mode
+
+        self.screen_width = 600
+        self.screen_height = 400
+        self.screens = None
+        self.clocks = None
+        self.isopen = True
+        self.state = None
+
+        self.steps_beyond_terminated = None
+
+    def step(
+        self, action: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, dict]:
+        assert self.action_space.contains(
+            action
+        ), f"{action!r} ({type(action)}) invalid"
+        assert self.state is not None, "Call reset before using step method."
+
+        x, x_dot, theta, theta_dot = self.state
+        force = np.sign(action - 0.5) * self.force_mag
+        costheta = np.cos(theta)
+        sintheta = np.sin(theta)
+
+        # For the interested reader:
+        # https://coneural.org/florian/papers/05_cart_pole.pdf
+        temp = (
+            force + self.polemass_length * theta_dot**2 * sintheta
+        ) / self.total_mass
+        thetaacc = (self.gravity * sintheta - costheta * temp) / (
+            self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass)
+        )
+        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
+
+        if self.kinematics_integrator == "euler":
+            x = x + self.tau * x_dot
+            x_dot = x_dot + self.tau * xacc
+            theta = theta + self.tau * theta_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+        else:  # semi-implicit euler
+            x_dot = x_dot + self.tau * xacc
+            x = x + self.tau * x_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+            theta = theta + self.tau * theta_dot
+
+        self.state = np.stack((x, x_dot, theta, theta_dot))
+
+        terminated: np.ndarray = (
+            (x < -self.x_threshold)
+            | (x > self.x_threshold)
+            | (theta < -self.theta_threshold_radians)
+            | (theta > self.theta_threshold_radians)
+        )
+
+        self.steps += 1
+
+        truncated = self.steps >= self.max_episode_steps
+
+        done = terminated | truncated
+
+        if any(done):
+            # This code was generated by copilot, need to check if it works
+            self.state[:, done] = self.np_random.uniform(
+                low=self.low, high=self.high, size=(4, done.sum())
+            ).astype(np.float32)
+            self.steps[done] = 0
+
+        reward = np.ones_like(terminated, dtype=np.float32)
+
+        if self.render_mode == "human":
+            self.render()
+
+        return self.state.T, reward, terminated, truncated, {}
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        super().reset(seed=seed)
+        # Note that if you use custom reset bounds, it may lead to out-of-bound
+        # state/observations.
+        self.low, self.high = utils.maybe_parse_reset_bounds(
+            options, -0.05, 0.05  # default low
+        )  # default high
+        self.state = self.np_random.uniform(
+            low=self.low, high=self.high, size=(4, self.num_envs)
+        ).astype(np.float32)
+        self.steps_beyond_terminated = None
+
+        if self.render_mode == "human":
+            self.render()
+        return self.state.T, {}
+
+    def render(self):
+        if self.render_mode is None:
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[classic_control]`"
+            )
+
+        if self.screens is None:
+            pygame.init()
+            if self.render_mode == "human":
+                pygame.display.init()
+                self.screens = [
+                    pygame.display.set_mode((self.screen_width, self.screen_height))
+                    for _ in range(self.num_envs)
+                ]
+            else:  # mode == "rgb_array"
+                self.screens = [
+                    pygame.Surface((self.screen_width, self.screen_height))
+                    for _ in range(self.num_envs)
+                ]
+        if self.clocks is None:
+            self.clock = [pygame.time.Clock() for _ in range(self.num_envs)]
+
+        world_width = self.x_threshold * 2
+        scale = self.screen_width / world_width
+        polewidth = 10.0
+        polelen = scale * (2 * self.length)
+        cartwidth = 50.0
+        cartheight = 30.0
+
+        if self.state is None:
+            return None
+
+        for state, screen, clock in zip(self.state, self.screens, self.clocks):
+            x = self.state.T
+
+            self.surf = pygame.Surface((self.screen_width, self.screen_height))
+            self.surf.fill((255, 255, 255))
+
+            l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
+            axleoffset = cartheight / 4.0
+            cartx = x[0] * scale + self.screen_width / 2.0  # MIDDLE OF CART
+            carty = 100  # TOP OF CART
+            cart_coords = [(l, b), (l, t), (r, t), (r, b)]
+            cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords]
+            gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0))
+            gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0))
+
+            l, r, t, b = (
+                -polewidth / 2,
+                polewidth / 2,
+                polelen - polewidth / 2,
+                -polewidth / 2,
+            )
+
+            pole_coords = []
+            for coord in [(l, b), (l, t), (r, t), (r, b)]:
+                coord = pygame.math.Vector2(coord).rotate_rad(-x[2])
+                coord = (coord[0] + cartx, coord[1] + carty + axleoffset)
+                pole_coords.append(coord)
+            gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101))
+            gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101))
+
+            gfxdraw.aacircle(
+                self.surf,
+                int(cartx),
+                int(carty + axleoffset),
+                int(polewidth / 2),
+                (129, 132, 203),
+            )
+            gfxdraw.filled_circle(
+                self.surf,
+                int(cartx),
+                int(carty + axleoffset),
+                int(polewidth / 2),
+                (129, 132, 203),
+            )
+
+            gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0))
+
+            self.surf = pygame.transform.flip(self.surf, False, True)
+            screen.blit(self.surf, (0, 0))
+
+        if self.render_mode == "human":
+            pygame.event.pump()
+            [clock.tick(self.metadata["render_fps"]) for clock in self.clocks]
+            pygame.display.flip()
+
+        elif self.render_mode == "rgb_array":
+            return [
+                np.transpose(
+                    np.array(pygame.surfarray.pixels3d(screen)), axes=(1, 0, 2)
+                )
+                for screen in self.screens
+            ]
+
+    def close(self):
+        if self.screens is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/continuous_mountain_car.py
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/continuous_mountain_car.py
@ -0,0 +1,304 @@
+"""
+@author: Olivier Sigaud
+
+A merge between two sources:
+
+* Adaptation of the MountainCar Environment from the "FAReinforcement" library
+of Jose Antonio Martin H. (version 1.0), adapted by  'Tom Schaul, tom@idsia.ch'
+and then modified by Arnaud de Broissia
+
+* the gymnasium MountainCar environment
+itself from
+http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
+permalink: https://perma.cc/6Z2N-PFWC
+"""
+
+import math
+from typing import Optional
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.envs.classic_control import utils
+from gymnasium.error import DependencyNotInstalled
+
+
+class Continuous_MountainCarEnv(gym.Env):
+    """
+    ## Description
+
+    The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
+    at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
+    that can be applied to the car in either direction. The goal of the MDP is to strategically
+    accelerate the car to reach the goal state on top of the right hill. There are two versions
+    of the mountain car domain in gymnasium: one with discrete actions and one with continuous.
+    This version is the one with continuous actions.
+
+    This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
+
+    ```
+    @TECHREPORT{Moore90efficientmemory-based,
+        author = {Andrew William Moore},
+        title = {Efficient Memory-based Learning for Robot Control},
+        institution = {University of Cambridge},
+        year = {1990}
+    }
+    ```
+
+    ## Observation Space
+
+    The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
+
+    | Num | Observation                          | Min  | Max | Unit         |
+    |-----|--------------------------------------|------|-----|--------------|
+    | 0   | position of the car along the x-axis | -Inf | Inf | position (m) |
+    | 1   | velocity of the car                  | -Inf | Inf | position (m) |
+
+    ## Action Space
+
+    The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car.
+    The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015.
+
+    ## Transition Dynamics:
+
+    Given an action, the mountain car follows the following transition dynamics:
+
+    *velocity<sub>t+1</sub> = velocity<sub>t+1</sub> + force * self.power - 0.0025 * cos(3 * position<sub>t</sub>)*
+
+    *position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
+
+    where force is the action clipped to the range `[-1,1]` and power is a constant 0.0015.
+    The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall.
+    The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07].
+
+    ## Reward
+
+    A negative reward of *-0.1 * action<sup>2</sup>* is received at each timestep to penalise for
+    taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100
+    is added to the negative reward for that timestep.
+
+    ## Starting State
+
+    The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`.
+    The starting velocity of the car is always assigned to 0.
+
+    ## Episode End
+
+    The episode ends if either of the following happens:
+    1. Termination: The position of the car is greater than or equal to 0.45 (the goal position on top of the right hill)
+    2. Truncation: The length of the episode is 999.
+
+    ## Arguments
+
+    ```python
+    import gymnasium as gym
+    gym.make('MountainCarContinuous-v0')
+    ```
+
+    On reset, the `options` parameter allows the user to change the bounds used to determine
+    the new random state.
+
+    ## Version History
+
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": 30,
+    }
+
+    def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
+        self.min_action = -1.0
+        self.max_action = 1.0
+        self.min_position = -1.2
+        self.max_position = 0.6
+        self.max_speed = 0.07
+        self.goal_position = (
+            0.45  # was 0.5 in gymnasium, 0.45 in Arnaud de Broissia's version
+        )
+        self.goal_velocity = goal_velocity
+        self.power = 0.0015
+
+        self.low_state = np.array(
+            [self.min_position, -self.max_speed], dtype=np.float32
+        )
+        self.high_state = np.array(
+            [self.max_position, self.max_speed], dtype=np.float32
+        )
+
+        self.render_mode = render_mode
+
+        self.screen_width = 600
+        self.screen_height = 400
+        self.screen = None
+        self.clock = None
+        self.isopen = True
+
+        self.action_space = spaces.Box(
+            low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
+        )
+        self.observation_space = spaces.Box(
+            low=self.low_state, high=self.high_state, dtype=np.float32
+        )
+
+    def step(self, action: np.ndarray):
+        position = self.state[0]
+        velocity = self.state[1]
+        force = min(max(action[0], self.min_action), self.max_action)
+
+        velocity += force * self.power - 0.0025 * math.cos(3 * position)
+        if velocity > self.max_speed:
+            velocity = self.max_speed
+        if velocity < -self.max_speed:
+            velocity = -self.max_speed
+        position += velocity
+        if position > self.max_position:
+            position = self.max_position
+        if position < self.min_position:
+            position = self.min_position
+        if position == self.min_position and velocity < 0:
+            velocity = 0
+
+        # Convert a possible numpy bool to a Python bool.
+        terminated = bool(
+            position >= self.goal_position and velocity >= self.goal_velocity
+        )
+
+        reward = 0
+        if terminated:
+            reward = 100.0
+        reward -= math.pow(action[0], 2) * 0.1
+
+        self.state = np.array([position, velocity], dtype=np.float32)
+
+        if self.render_mode == "human":
+            self.render()
+        return self.state, reward, terminated, False, {}
+
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
+        super().reset(seed=seed)
+        # Note that if you use custom reset bounds, it may lead to out-of-bound
+        # state/observations.
+        low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
+        self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
+
+        if self.render_mode == "human":
+            self.render()
+        return np.array(self.state, dtype=np.float32), {}
+
+    def _height(self, xs):
+        return np.sin(3 * xs) * 0.45 + 0.55
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError as e:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[classic-control]`"
+            ) from e
+
+        if self.screen is None:
+            pygame.init()
+            if self.render_mode == "human":
+                pygame.display.init()
+                self.screen = pygame.display.set_mode(
+                    (self.screen_width, self.screen_height)
+                )
+            else:  # mode == "rgb_array":
+                self.screen = pygame.Surface((self.screen_width, self.screen_height))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        world_width = self.max_position - self.min_position
+        scale = self.screen_width / world_width
+        carwidth = 40
+        carheight = 20
+
+        self.surf = pygame.Surface((self.screen_width, self.screen_height))
+        self.surf.fill((255, 255, 255))
+
+        pos = self.state[0]
+
+        xs = np.linspace(self.min_position, self.max_position, 100)
+        ys = self._height(xs)
+        xys = list(zip((xs - self.min_position) * scale, ys * scale))
+
+        pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
+
+        clearance = 10
+
+        l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
+        coords = []
+        for c in [(l, b), (l, t), (r, t), (r, b)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            coords.append(
+                (
+                    c[0] + (pos - self.min_position) * scale,
+                    c[1] + clearance + self._height(pos) * scale,
+                )
+            )
+
+        gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
+        gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
+
+        for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            wheel = (
+                int(c[0] + (pos - self.min_position) * scale),
+                int(c[1] + clearance + self._height(pos) * scale),
+            )
+
+            gfxdraw.aacircle(
+                self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
+            )
+            gfxdraw.filled_circle(
+                self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
+            )
+
+        flagx = int((self.goal_position - self.min_position) * scale)
+        flagy1 = int(self._height(self.goal_position) * scale)
+        flagy2 = flagy1 + 50
+        gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0))
+
+        gfxdraw.aapolygon(
+            self.surf,
+            [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
+            (204, 204, 0),
+        )
+        gfxdraw.filled_polygon(
+            self.surf,
+            [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
+            (204, 204, 0),
+        )
+
+        self.surf = pygame.transform.flip(self.surf, False, True)
+        self.screen.blit(self.surf, (0, 0))
+        if self.render_mode == "human":
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            pygame.display.flip()
+
+        elif self.render_mode == "rgb_array":
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
+            )
+
+    def close(self):
+        if self.screen is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/mountain_car.py
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/mountain_car.py
@ -0,0 +1,284 @@
+"""
+http://incompleteideas.net/MountainCar/MountainCar1.cp
+permalink: https://perma.cc/6Z2N-PFWC
+"""
+import math
+from typing import Optional
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.envs.classic_control import utils
+from gymnasium.error import DependencyNotInstalled
+
+
+class MountainCarEnv(gym.Env):
+    """
+    ## Description
+
+    The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
+    at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
+    that can be applied to the car in either direction. The goal of the MDP is to strategically
+    accelerate the car to reach the goal state on top of the right hill. There are two versions
+    of the mountain car domain in gymnasium: one with discrete actions and one with continuous.
+    This version is the one with discrete actions.
+
+    This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
+
+    ```
+    @TECHREPORT{Moore90efficientmemory-based,
+        author = {Andrew William Moore},
+        title = {Efficient Memory-based Learning for Robot Control},
+        institution = {University of Cambridge},
+        year = {1990}
+    }
+    ```
+
+    ## Observation Space
+
+    The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
+
+    | Num | Observation                          | Min   | Max  | Unit         |
+    |-----|--------------------------------------|-------|------|--------------|
+    | 0   | position of the car along the x-axis | -1.2  | 0.6  | position (m) |
+    | 1   | velocity of the car                  | -0.07 | 0.07 | velocity (v) |
+
+    ## Action Space
+
+    There are 3 discrete deterministic actions:
+
+    - 0: Accelerate to the left
+    - 1: Don't accelerate
+    - 2: Accelerate to the right
+
+    ## Transition Dynamics:
+
+    Given an action, the mountain car follows the following transition dynamics:
+
+    *velocity<sub>t+1</sub> = velocity<sub>t</sub> + (action - 1) * force - cos(3 * position<sub>t</sub>) * gravity*
+
+    *position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
+
+    where force = 0.001 and gravity = 0.0025. The collisions at either end are inelastic with the velocity set to 0
+    upon collision with the wall. The position is clipped to the range `[-1.2, 0.6]` and
+    velocity is clipped to the range `[-0.07, 0.07]`.
+
+    ## Reward:
+
+    The goal is to reach the flag placed on top of the right hill as quickly as possible, as such the agent is
+    penalised with a reward of -1 for each timestep.
+
+    ## Starting State
+
+    The position of the car is assigned a uniform random value in *[-0.6 , -0.4]*.
+    The starting velocity of the car is always assigned to 0.
+
+    ## Episode End
+
+    The episode ends if either of the following happens:
+    1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill)
+    2. Truncation: The length of the episode is 200.
+
+
+    ## Arguments
+
+    ```python
+    import gymnasium as gym
+    gym.make('MountainCar-v0')
+    ```
+
+    On reset, the `options` parameter allows the user to change the bounds used to determine
+    the new random state.
+
+    ## Version History
+
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": 30,
+    }
+
+    def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
+        self.min_position = -1.2
+        self.max_position = 0.6
+        self.max_speed = 0.07
+        self.goal_position = 0.5
+        self.goal_velocity = goal_velocity
+
+        self.force = 0.001
+        self.gravity = 0.0025
+
+        self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
+        self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)
+
+        self.render_mode = render_mode
+
+        self.screen_width = 600
+        self.screen_height = 400
+        self.screen = None
+        self.clock = None
+        self.isopen = True
+
+        self.action_space = spaces.Discrete(3)
+        self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
+
+    def step(self, action: int):
+        assert self.action_space.contains(
+            action
+        ), f"{action!r} ({type(action)}) invalid"
+
+        position, velocity = self.state
+        velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
+        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
+        position += velocity
+        position = np.clip(position, self.min_position, self.max_position)
+        if position == self.min_position and velocity < 0:
+            velocity = 0
+
+        terminated = bool(
+            position >= self.goal_position and velocity >= self.goal_velocity
+        )
+        reward = -1.0
+
+        self.state = (position, velocity)
+        if self.render_mode == "human":
+            self.render()
+        return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        super().reset(seed=seed)
+        # Note that if you use custom reset bounds, it may lead to out-of-bound
+        # state/observations.
+        low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
+        self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
+
+        if self.render_mode == "human":
+            self.render()
+        return np.array(self.state, dtype=np.float32), {}
+
+    def _height(self, xs):
+        return np.sin(3 * xs) * 0.45 + 0.55
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError as e:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[classic-control]`"
+            ) from e
+
+        if self.screen is None:
+            pygame.init()
+            if self.render_mode == "human":
+                pygame.display.init()
+                self.screen = pygame.display.set_mode(
+                    (self.screen_width, self.screen_height)
+                )
+            else:  # mode in "rgb_array"
+                self.screen = pygame.Surface((self.screen_width, self.screen_height))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        world_width = self.max_position - self.min_position
+        scale = self.screen_width / world_width
+        carwidth = 40
+        carheight = 20
+
+        self.surf = pygame.Surface((self.screen_width, self.screen_height))
+        self.surf.fill((255, 255, 255))
+
+        pos = self.state[0]
+
+        xs = np.linspace(self.min_position, self.max_position, 100)
+        ys = self._height(xs)
+        xys = list(zip((xs - self.min_position) * scale, ys * scale))
+
+        pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
+
+        clearance = 10
+
+        l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
+        coords = []
+        for c in [(l, b), (l, t), (r, t), (r, b)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            coords.append(
+                (
+                    c[0] + (pos - self.min_position) * scale,
+                    c[1] + clearance + self._height(pos) * scale,
+                )
+            )
+
+        gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
+        gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
+
+        for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            wheel = (
+                int(c[0] + (pos - self.min_position) * scale),
+                int(c[1] + clearance + self._height(pos) * scale),
+            )
+
+            gfxdraw.aacircle(
+                self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
+            )
+            gfxdraw.filled_circle(
+                self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
+            )
+
+        flagx = int((self.goal_position - self.min_position) * scale)
+        flagy1 = int(self._height(self.goal_position) * scale)
+        flagy2 = flagy1 + 50
+        gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0))
+
+        gfxdraw.aapolygon(
+            self.surf,
+            [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
+            (204, 204, 0),
+        )
+        gfxdraw.filled_polygon(
+            self.surf,
+            [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
+            (204, 204, 0),
+        )
+
+        self.surf = pygame.transform.flip(self.surf, False, True)
+        self.screen.blit(self.surf, (0, 0))
+        if self.render_mode == "human":
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            pygame.display.flip()
+
+        elif self.render_mode == "rgb_array":
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
+            )
+
+    def get_keys_to_action(self):
+        # Control with left and right arrow keys.
+        return {(): 1, (276,): 0, (275,): 2, (275, 276): 1}
+
+    def close(self):
+        if self.screen is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/pendulum.py
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/pendulum.py
@ -0,0 +1,277 @@
+__credits__ = ["Carlos Luis"]
+
+from os import path
+from typing import Optional
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.envs.classic_control import utils
+from gymnasium.error import DependencyNotInstalled
+
+
+DEFAULT_X = np.pi
+DEFAULT_Y = 1.0
+
+
+class PendulumEnv(gym.Env):
+    """
+    ## Description
+
+    The inverted pendulum swingup problem is based on the classic problem in control theory.
+    The system consists of a pendulum attached at one end to a fixed point, and the other end being free.
+    The pendulum starts in a random position and the goal is to apply torque on the free end to swing it
+    into an upright position, with its center of gravity right above the fixed point.
+
+    The diagram below specifies the coordinate system used for the implementation of the pendulum's
+    dynamic equations.
+
+    ![Pendulum Coordinate System](/_static/diagrams/pendulum.png)
+
+    -  `x-y`: cartesian coordinates of the pendulum's end in meters.
+    - `theta` : angle in radians.
+    - `tau`: torque in `N m`. Defined as positive _counter-clockwise_.
+
+    ## Action Space
+
+    The action is a `ndarray` with shape `(1,)` representing the torque applied to free end of the pendulum.
+
+    | Num | Action | Min  | Max |
+    |-----|--------|------|-----|
+    | 0   | Torque | -2.0 | 2.0 |
+
+
+    ## Observation Space
+
+    The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free
+    end and its angular velocity.
+
+    | Num | Observation      | Min  | Max |
+    |-----|------------------|------|-----|
+    | 0   | x = cos(theta)   | -1.0 | 1.0 |
+    | 1   | y = sin(theta)   | -1.0 | 1.0 |
+    | 2   | Angular Velocity | -8.0 | 8.0 |
+
+    ## Rewards
+
+    The reward function is defined as:
+
+    *r = -(theta<sup>2</sup> + 0.1 * theta_dt<sup>2</sup> + 0.001 * torque<sup>2</sup>)*
+
+    where `theta` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position).
+    Based on the above equation, the minimum reward that can be obtained is
+    *-(pi<sup>2</sup> + 0.1 * 8<sup>2</sup> + 0.001 * 2<sup>2</sup>) = -16.2736044*,
+    while the maximum reward is zero (pendulum is upright with zero velocity and no torque applied).
+
+    ## Starting State
+
+    The starting state is a random angle in *[-pi, pi]* and a random angular velocity in *[-1,1]*.
+
+    ## Episode Truncation
+
+    The episode truncates at 200 time steps.
+
+    ## Arguments
+
+    - `g`: acceleration of gravity measured in *(m s<sup>-2</sup>)* used to calculate the pendulum dynamics.
+      The default value is g = 10.0 .
+
+    ```python
+    import gymnasium as gym
+    gym.make('Pendulum-v1', g=9.81)
+    ```
+
+    On reset, the `options` parameter allows the user to change the bounds used to determine
+    the new random state.
+
+    ## Version History
+
+    * v1: Simplify the math equations, no difference in behavior.
+    * v0: Initial versions release (1.0.0)
+
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": 30,
+    }
+
+    def __init__(self, render_mode: Optional[str] = None, g=10.0):
+        self.max_speed = 8
+        self.max_torque = 2.0
+        self.dt = 0.05
+        self.g = g
+        self.m = 1.0
+        self.l = 1.0
+
+        self.render_mode = render_mode
+
+        self.screen_dim = 500
+        self.screen = None
+        self.clock = None
+        self.isopen = True
+
+        high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
+        # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
+        #   or normalised as max_torque == 2 by default. Ignoring the issue here as the default settings are too old
+        #   to update to follow the gymnasium api
+        self.action_space = spaces.Box(
+            low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
+        )
+        self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
+
+    def step(self, u):
+        th, thdot = self.state  # th := theta
+
+        g = self.g
+        m = self.m
+        l = self.l
+        dt = self.dt
+
+        u = np.clip(u, -self.max_torque, self.max_torque)[0]
+        self.last_u = u  # for rendering
+        costs = angle_normalize(th) ** 2 + 0.1 * thdot**2 + 0.001 * (u**2)
+
+        newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l**2) * u) * dt
+        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
+        newth = th + newthdot * dt
+
+        self.state = np.array([newth, newthdot])
+
+        if self.render_mode == "human":
+            self.render()
+        return self._get_obs(), -costs, False, False, {}
+
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
+        super().reset(seed=seed)
+        if options is None:
+            high = np.array([DEFAULT_X, DEFAULT_Y])
+        else:
+            # Note that if you use custom reset bounds, it may lead to out-of-bound
+            # state/observations.
+            x = options.get("x_init") if "x_init" in options else DEFAULT_X
+            y = options.get("y_init") if "y_init" in options else DEFAULT_Y
+            x = utils.verify_number_and_cast(x)
+            y = utils.verify_number_and_cast(y)
+            high = np.array([x, y])
+        low = -high  # We enforce symmetric limits.
+        self.state = self.np_random.uniform(low=low, high=high)
+        self.last_u = None
+
+        if self.render_mode == "human":
+            self.render()
+        return self._get_obs(), {}
+
+    def _get_obs(self):
+        theta, thetadot = self.state
+        return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError as e:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[classic-control]`"
+            ) from e
+
+        if self.screen is None:
+            pygame.init()
+            if self.render_mode == "human":
+                pygame.display.init()
+                self.screen = pygame.display.set_mode(
+                    (self.screen_dim, self.screen_dim)
+                )
+            else:  # mode in "rgb_array"
+                self.screen = pygame.Surface((self.screen_dim, self.screen_dim))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        self.surf = pygame.Surface((self.screen_dim, self.screen_dim))
+        self.surf.fill((255, 255, 255))
+
+        bound = 2.2
+        scale = self.screen_dim / (bound * 2)
+        offset = self.screen_dim // 2
+
+        rod_length = 1 * scale
+        rod_width = 0.2 * scale
+        l, r, t, b = 0, rod_length, rod_width / 2, -rod_width / 2
+        coords = [(l, b), (l, t), (r, t), (r, b)]
+        transformed_coords = []
+        for c in coords:
+            c = pygame.math.Vector2(c).rotate_rad(self.state[0] + np.pi / 2)
+            c = (c[0] + offset, c[1] + offset)
+            transformed_coords.append(c)
+        gfxdraw.aapolygon(self.surf, transformed_coords, (204, 77, 77))
+        gfxdraw.filled_polygon(self.surf, transformed_coords, (204, 77, 77))
+
+        gfxdraw.aacircle(self.surf, offset, offset, int(rod_width / 2), (204, 77, 77))
+        gfxdraw.filled_circle(
+            self.surf, offset, offset, int(rod_width / 2), (204, 77, 77)
+        )
+
+        rod_end = (rod_length, 0)
+        rod_end = pygame.math.Vector2(rod_end).rotate_rad(self.state[0] + np.pi / 2)
+        rod_end = (int(rod_end[0] + offset), int(rod_end[1] + offset))
+        gfxdraw.aacircle(
+            self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77)
+        )
+        gfxdraw.filled_circle(
+            self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77)
+        )
+
+        fname = path.join(path.dirname(__file__), "assets/clockwise.png")
+        img = pygame.image.load(fname)
+        if self.last_u is not None:
+            scale_img = pygame.transform.smoothscale(
+                img,
+                (scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2),
+            )
+            is_flip = bool(self.last_u > 0)
+            scale_img = pygame.transform.flip(scale_img, is_flip, True)
+            self.surf.blit(
+                scale_img,
+                (
+                    offset - scale_img.get_rect().centerx,
+                    offset - scale_img.get_rect().centery,
+                ),
+            )
+
+        # drawing axle
+        gfxdraw.aacircle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0))
+        gfxdraw.filled_circle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0))
+
+        self.surf = pygame.transform.flip(self.surf, False, True)
+        self.screen.blit(self.surf, (0, 0))
+        if self.render_mode == "human":
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            pygame.display.flip()
+
+        else:  # mode == "rgb_array":
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
+            )
+
+    def close(self):
+        if self.screen is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
+
+
+def angle_normalize(x):
+    return ((x + np.pi) % (2 * np.pi)) - np.pi
--- a/rl/Lib/site-packages/gymnasium/envs/classic_control/utils.py
+++ b/rl/Lib/site-packages/gymnasium/envs/classic_control/utils.py
@ -0,0 +1,46 @@
+"""
+Utility functions used for classic control environments.
+"""
+
+from typing import Optional, SupportsFloat, Tuple
+
+
+def verify_number_and_cast(x: SupportsFloat) -> float:
+    """Verify parameter is a single number and cast to a float."""
+    try:
+        x = float(x)
+    except (ValueError, TypeError) as e:
+        raise ValueError(f"An option ({x}) could not be converted to a float.") from e
+    return x
+
+
+def maybe_parse_reset_bounds(
+    options: Optional[dict], default_low: float, default_high: float
+) -> Tuple[float, float]:
+    """
+    This function can be called during a reset() to customize the sampling
+    ranges for setting the initial state distributions.
+
+    Args:
+      options: Options passed in to reset().
+      default_low: Default lower limit to use, if none specified in options.
+      default_high: Default upper limit to use, if none specified in options.
+
+    Returns:
+      Tuple of the lower and upper limits.
+    """
+    if options is None:
+        return default_low, default_high
+
+    low = options.get("low") if "low" in options else default_low
+    high = options.get("high") if "high" in options else default_high
+
+    # We expect only numerical inputs.
+    low = verify_number_and_cast(low)
+    high = verify_number_and_cast(high)
+    if low > high:
+        raise ValueError(
+            f"Lower bound ({low}) must be lower than higher bound ({high})."
+        )
+
+    return low, high