I am done

2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/init.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/init.py
@ -0,0 +1,15 @@
+from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv  # isort:skip
+
+# ^^^^^ so that user gets the correct error
+# message if mujoco is not installed correctly
+from gymnasium.envs.mujoco.ant import AntEnv
+from gymnasium.envs.mujoco.half_cheetah import HalfCheetahEnv
+from gymnasium.envs.mujoco.hopper import HopperEnv
+from gymnasium.envs.mujoco.humanoid import HumanoidEnv
+from gymnasium.envs.mujoco.humanoidstandup import HumanoidStandupEnv
+from gymnasium.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
+from gymnasium.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
+from gymnasium.envs.mujoco.pusher import PusherEnv
+from gymnasium.envs.mujoco.reacher import ReacherEnv
+from gymnasium.envs.mujoco.swimmer import SwimmerEnv
+from gymnasium.envs.mujoco.walker2d import Walker2dEnv
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/init.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/init.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/ant.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/ant.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/ant_v3.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/ant_v3.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/ant_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/ant_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/half_cheetah.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/half_cheetah.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/half_cheetah_v3.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/half_cheetah_v3.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/half_cheetah_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/half_cheetah_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/hopper.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/hopper.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/hopper_v3.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/hopper_v3.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/hopper_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/hopper_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoid.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoid.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoid_v3.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoid_v3.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoid_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoid_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoidstandup.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoidstandup.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoidstandup_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/humanoidstandup_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_double_pendulum.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_double_pendulum.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_double_pendulum_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_double_pendulum_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_pendulum.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_pendulum.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_pendulum_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/inverted_pendulum_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/mujoco_env.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/mujoco_env.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/mujoco_rendering.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/mujoco_rendering.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/pusher.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/pusher.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/pusher_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/pusher_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/reacher.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/reacher.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/reacher_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/reacher_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/swimmer.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/swimmer.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/swimmer_v3.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/swimmer_v3.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/swimmer_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/swimmer_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/walker2d.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/walker2d.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/walker2d_v3.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/walker2d_v3.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/walker2d_v4.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pycache/walker2d_v4.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/ant.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/ant.py
@ -0,0 +1,80 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class AntEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(
+            low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
+        )
+        MuJocoPyEnv.__init__(
+            self, "ant.xml", 5, observation_space=observation_space, **kwargs
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def step(self, a):
+        xposbefore = self.get_body_com("torso")[0]
+        self.do_simulation(a, self.frame_skip)
+        xposafter = self.get_body_com("torso")[0]
+
+        forward_reward = (xposafter - xposbefore) / self.dt
+        ctrl_cost = 0.5 * np.square(a).sum()
+        contact_cost = (
+            0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
+        )
+        survive_reward = 1.0
+        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
+        state = self.state_vector()
+        not_terminated = (
+            np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
+        )
+        terminated = not not_terminated
+        ob = self._get_obs()
+
+        if self.render_mode == "human":
+            self.render()
+        return (
+            ob,
+            reward,
+            terminated,
+            False,
+            dict(
+                reward_forward=forward_reward,
+                reward_ctrl=-ctrl_cost,
+                reward_contact=-contact_cost,
+                reward_survive=survive_reward,
+            ),
+        )
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[2:],
+                self.sim.data.qvel.flat,
+                np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
+            ]
+        )
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.model.nq, low=-0.1, high=0.1
+        )
+        qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/ant_v3.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/ant_v3.py
@ -0,0 +1,187 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "distance": 4.0,
+}
+
+
+class AntEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(
+        self,
+        xml_file="ant.xml",
+        ctrl_cost_weight=0.5,
+        contact_cost_weight=5e-4,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.2, 1.0),
+        contact_force_range=(-1.0, 1.0),
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            xml_file,
+            ctrl_cost_weight,
+            contact_cost_weight,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_z_range,
+            contact_force_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._ctrl_cost_weight = ctrl_cost_weight
+        self._contact_cost_weight = contact_cost_weight
+
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+        self._healthy_z_range = healthy_z_range
+
+        self._contact_force_range = contact_force_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64
+            )
+
+        MuJocoPyEnv.__init__(
+            self, xml_file, 5, observation_space=observation_space, **kwargs
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    @property
+    def contact_forces(self):
+        raw_contact_forces = self.sim.data.cfrc_ext
+        min_value, max_value = self._contact_force_range
+        contact_forces = np.clip(raw_contact_forces, min_value, max_value)
+        return contact_forces
+
+    @property
+    def contact_cost(self):
+        contact_cost = self._contact_cost_weight * np.sum(
+            np.square(self.contact_forces)
+        )
+        return contact_cost
+
+    @property
+    def is_healthy(self):
+        state = self.state_vector()
+        min_z, max_z = self._healthy_z_range
+        is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = not self.is_healthy if self._terminate_when_unhealthy else False
+        return terminated
+
+    def step(self, action):
+        xy_position_before = self.get_body_com("torso")[:2].copy()
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = self.get_body_com("torso")[:2].copy()
+
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+
+        ctrl_cost = self.control_cost(action)
+        contact_cost = self.contact_cost
+
+        forward_reward = x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost + contact_cost
+
+        reward = rewards - costs
+        terminated = self.terminated
+        observation = self._get_obs()
+        info = {
+            "reward_forward": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "reward_contact": -contact_cost,
+            "reward_survive": healthy_reward,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = self.sim.data.qvel.flat.copy()
+        contact_force = self.contact_forces.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+
+        observations = np.concatenate((position, velocity, contact_force))
+
+        return observations
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = (
+            self.init_qvel
+            + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
+        )
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+
+        return observation
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/ant_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/ant_v4.py
@ -0,0 +1,379 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "distance": 4.0,
+}
+
+
+class AntEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment is based on the environment introduced by Schulman,
+    Moritz, Levine, Jordan and Abbeel in ["High-Dimensional Continuous Control
+    Using Generalized Advantage Estimation"](https://arxiv.org/abs/1506.02438).
+    The ant is a 3D robot consisting of one torso (free rotational body) with
+    four legs attached to it with each leg having two body parts. The goal is to
+    coordinate the four legs to move in the forward (right) direction by applying
+    torques on the eight hinges connecting the two body parts of each leg and the torso
+    (nine body parts and eight hinges).
+
+    ## Action Space
+    The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints.
+
+    | Num | Action                                                            | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit         |
+    | --- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
+    | 0   | Torque applied on the rotor between the torso and back right hip  | -1          | 1           | hip_4 (right_back_leg)           | hinge | torque (N m) |
+    | 1   | Torque applied on the rotor between the back right two links      | -1          | 1           | angle_4 (right_back_leg)         | hinge | torque (N m) |
+    | 2   | Torque applied on the rotor between the torso and front left hip  | -1          | 1           | hip_1 (front_left_leg)           | hinge | torque (N m) |
+    | 3   | Torque applied on the rotor between the front left two links      | -1          | 1           | angle_1 (front_left_leg)         | hinge | torque (N m) |
+    | 4   | Torque applied on the rotor between the torso and front right hip | -1          | 1           | hip_2 (front_right_leg)          | hinge | torque (N m) |
+    | 5   | Torque applied on the rotor between the front right two links     | -1          | 1           | angle_2 (front_right_leg)        | hinge | torque (N m) |
+    | 6   | Torque applied on the rotor between the torso and back left hip   | -1          | 1           | hip_3 (back_leg)                 | hinge | torque (N m) |
+    | 7   | Torque applied on the rotor between the back left two links       | -1          | 1           | angle_3 (back_leg)               | hinge | torque (N m) |
+
+    ## Observation Space
+    Observations consist of positional values of different body parts of the ant,
+    followed by the velocities of those individual parts (their derivatives) with all
+    the positions ordered before all the velocities.
+
+    By default, observations do not include the x- and y-coordinates of the ant's torso. These may
+    be included by passing `exclude_current_positions_from_observation=False` during construction.
+    In that case, the observation space will be a `Box(-Inf, Inf, (29,), float64)` where the first two observations
+    represent the x- and y- coordinates of the ant's torso.
+    Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
+    of the torso will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
+
+    However, by default, observation Space is a `Box(-Inf, Inf, (27,), float64)` where the elements correspond to the following:
+
+    | Num | Observation                                                  | Min    | Max    | Name (in corresponding XML file)       | Joint | Unit                     |
+    |-----|--------------------------------------------------------------|--------|--------|----------------------------------------|-------|--------------------------|
+    | 0   | z-coordinate of the torso (centre)                           | -Inf   | Inf    | torso                                  | free  | position (m)             |
+    | 1   | x-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+    | 2   | y-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+    | 3   | z-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+    | 4   | w-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+    | 5   | angle between torso and first link on front left             | -Inf   | Inf    | hip_1 (front_left_leg)                 | hinge | angle (rad)              |
+    | 6   | angle between the two links on the front left                | -Inf   | Inf    | ankle_1 (front_left_leg)               | hinge | angle (rad)              |
+    | 7   | angle between torso and first link on front right            | -Inf   | Inf    | hip_2 (front_right_leg)                | hinge | angle (rad)              |
+    | 8   | angle between the two links on the front right               | -Inf   | Inf    | ankle_2 (front_right_leg)              | hinge | angle (rad)              |
+    | 9   | angle between torso and first link on back left              | -Inf   | Inf    | hip_3 (back_leg)                       | hinge | angle (rad)              |
+    | 10  | angle between the two links on the back left                 | -Inf   | Inf    | ankle_3 (back_leg)                     | hinge | angle (rad)              |
+    | 11  | angle between torso and first link on back right             | -Inf   | Inf    | hip_4 (right_back_leg)                 | hinge | angle (rad)              |
+    | 12  | angle between the two links on the back right                | -Inf   | Inf    | ankle_4 (right_back_leg)               | hinge | angle (rad)              |
+    | 13  | x-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
+    | 14  | y-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
+    | 15  | z-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
+    | 16  | x-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
+    | 17  | y-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
+    | 18  | z-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
+    | 19  | angular velocity of angle between torso and front left link  | -Inf   | Inf    | hip_1 (front_left_leg)                 | hinge | angle (rad)              |
+    | 20  | angular velocity of the angle between front left links       | -Inf   | Inf    | ankle_1 (front_left_leg)               | hinge | angle (rad)              |
+    | 21  | angular velocity of angle between torso and front right link | -Inf   | Inf    | hip_2 (front_right_leg)                | hinge | angle (rad)              |
+    | 22  | angular velocity of the angle between front right links      | -Inf   | Inf    | ankle_2 (front_right_leg)              | hinge | angle (rad)              |
+    | 23  | angular velocity of angle between torso and back left link   | -Inf   | Inf    | hip_3 (back_leg)                       | hinge | angle (rad)              |
+    | 24  | angular velocity of the angle between back left links        | -Inf   | Inf    | ankle_3 (back_leg)                     | hinge | angle (rad)              |
+    | 25  | angular velocity of angle between torso and back right link  | -Inf   | Inf    | hip_4 (right_back_leg)                 | hinge | angle (rad)              |
+    | 26  | angular velocity of the angle between back right links       | -Inf   | Inf    | ankle_4 (right_back_leg)               | hinge | angle (rad)              |
+    | excluded | x-coordinate of the torso (centre)                      | -Inf   | Inf    | torso                                  | free  | position (m)             |
+    | excluded | y-coordinate of the torso (centre)                      | -Inf   | Inf    | torso                                  | free  | position (m)             |
+
+
+    If version < `v4` or `use_contact_forces` is `True` then the observation space is extended by 14*6 = 84 elements, which are contact forces
+    (external forces - force x, y, z and torque x, y, z) applied to the
+    center of mass of each of the body parts. The 14 body parts are:
+
+    | id (for `v2`, `v3`, `v4)` | body parts |
+    |  ---  |  ------------  |
+    | 0  | worldbody (note: forces are always full of zeros) |
+    | 1  | torso |
+    | 2  | front_left_leg |
+    | 3  | aux_1 (front left leg) |
+    | 4  | ankle_1 (front left leg) |
+    | 5  | front_right_leg |
+    | 6  | aux_2 (front right leg) |
+    | 7  | ankle_2 (front right leg) |
+    | 8  | back_leg (back left leg) |
+    | 9  | aux_3 (back left leg) |
+    | 10 | ankle_3 (back left leg) |
+    | 11 | right_back_leg |
+    | 12 | aux_4 (back right leg) |
+    | 13 | ankle_4 (back right leg) |
+
+
+    The (x,y,z) coordinates are translational DOFs while the orientations are rotational
+    DOFs expressed as quaternions. One can read more about free joints on the [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
+
+
+    **Note:** Ant-v4 environment no longer has the following contact forces issue.
+    If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
+    in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
+    when using the Ant environment if you would like to report results with contact forces (if
+    contact forces are not used in your experiments, you can use version > 2.0).
+
+    ## Rewards
+    The reward consists of three parts:
+    - *healthy_reward*: Every timestep that the ant is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`
+    - *forward_reward*: A reward of moving forward which is measured as
+    *(x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time
+    between actions and is dependent on the `frame_skip` parameter (default is 5),
+    where the frametime is 0.01 - making the default *dt = 5 * 0.01 = 0.05*.
+    This reward would be positive if the ant moves forward (in positive x direction).
+    - *ctrl_cost*: A negative reward for penalising the ant if it takes actions
+    that are too large. It is measured as *`ctrl_cost_weight` * sum(action<sup>2</sup>)*
+    where *`ctr_cost_weight`* is a parameter set for the control and has a default value of 0.5.
+    - *contact_cost*: A negative reward for penalising the ant if the external contact
+    force is too large. It is calculated *`contact_cost_weight` * sum(clip(external contact
+    force to `contact_force_range`)<sup>2</sup>)*.
+
+    The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost*.
+
+    But if `use_contact_forces=True` or version < `v4`
+    The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost*.
+
+    In either case `info` will also contain the individual reward terms.
+
+    ## Starting State
+    All observations start in state
+    (0.0, 0.0,  0.75, 1.0, 0.0  ... 0.0) with a uniform noise in the range
+    of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional values and standard normal noise
+    with mean 0 and standard deviation `reset_noise_scale` added to the velocity values for
+    stochasticity. Note that the initial z coordinate is intentionally selected
+    to be slightly high, thereby indicating a standing up ant. The initial orientation
+    is designed to make it face forward as well.
+
+    ## Episode End
+    The ant is said to be unhealthy if any of the following happens:
+
+    1. Any of the state space values is no longer finite
+    2. The z-coordinate of the torso is **not** in the closed interval given by `healthy_z_range` (defaults to [0.2, 1.0])
+
+    If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+    the episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches a 1000 timesteps
+    2. Termination: The ant is unhealthy
+
+    If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+    ## Arguments
+
+    No additional arguments are currently supported in v2 and lower.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Ant-v2')
+    ```
+
+    v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Ant-v4', ctrl_cost_weight=0.1, ...)
+    ```
+
+    | Parameter               | Type       | Default      |Description                    |
+    |-------------------------|------------|--------------|-------------------------------|
+    | `xml_file`              | **str**    | `"ant.xml"`  | Path to a MuJoCo model |
+    | `ctrl_cost_weight`      | **float**  | `0.5`        | Weight for *ctrl_cost* term (see section on reward) |
+    | `use_contact_forces`    | **bool**  | `False`      | If true, it extends the observation space by adding contact forces (see `Observation Space` section) and includes contact_cost to the reward function (see `Rewards` section) |
+    | `contact_cost_weight`   | **float**  | `5e-4`       | Weight for *contact_cost* term (see section on reward) |
+    | `healthy_reward`        | **float**  | `1`          | Constant reward given if the ant is "healthy" after timestep |
+    | `terminate_when_unhealthy` | **bool**| `True`       | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` |
+    | `healthy_z_range`       | **tuple**  | `(0.2, 1)`   | The ant is considered healthy if the z-coordinate of the torso is in this range |
+    | `contact_force_range`   | **tuple**  | `(-1, 1)`    | Contact forces are clipped to this range in the computation of *contact_cost* |
+    | `reset_noise_scale`     | **float**  | `0.1`        | Scale of random perturbations of initial position and velocity (see section on Starting State) |
+    | `exclude_current_positions_from_observation`| **bool** | `True`| Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+    ## Version History
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3, also removed contact forces from the default observation space (new variable `use_contact_forces=True` can restore them)
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(
+        self,
+        xml_file="ant.xml",
+        ctrl_cost_weight=0.5,
+        use_contact_forces=False,
+        contact_cost_weight=5e-4,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.2, 1.0),
+        contact_force_range=(-1.0, 1.0),
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            xml_file,
+            ctrl_cost_weight,
+            use_contact_forces,
+            contact_cost_weight,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_z_range,
+            contact_force_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._ctrl_cost_weight = ctrl_cost_weight
+        self._contact_cost_weight = contact_cost_weight
+
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+        self._healthy_z_range = healthy_z_range
+
+        self._contact_force_range = contact_force_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._use_contact_forces = use_contact_forces
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        obs_shape = 27
+        if not exclude_current_positions_from_observation:
+            obs_shape += 2
+        if use_contact_forces:
+            obs_shape += 84
+
+        observation_space = Box(
+            low=-np.inf, high=np.inf, shape=(obs_shape,), dtype=np.float64
+        )
+
+        MujocoEnv.__init__(
+            self,
+            xml_file,
+            5,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    @property
+    def contact_forces(self):
+        raw_contact_forces = self.data.cfrc_ext
+        min_value, max_value = self._contact_force_range
+        contact_forces = np.clip(raw_contact_forces, min_value, max_value)
+        return contact_forces
+
+    @property
+    def contact_cost(self):
+        contact_cost = self._contact_cost_weight * np.sum(
+            np.square(self.contact_forces)
+        )
+        return contact_cost
+
+    @property
+    def is_healthy(self):
+        state = self.state_vector()
+        min_z, max_z = self._healthy_z_range
+        is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = not self.is_healthy if self._terminate_when_unhealthy else False
+        return terminated
+
+    def step(self, action):
+        xy_position_before = self.get_body_com("torso")[:2].copy()
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = self.get_body_com("torso")[:2].copy()
+
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+
+        forward_reward = x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+
+        costs = ctrl_cost = self.control_cost(action)
+
+        terminated = self.terminated
+        observation = self._get_obs()
+        info = {
+            "reward_forward": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "reward_survive": healthy_reward,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+        if self._use_contact_forces:
+            contact_cost = self.contact_cost
+            costs += contact_cost
+            info["reward_ctrl"] = -contact_cost
+
+        reward = rewards - costs
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def _get_obs(self):
+        position = self.data.qpos.flat.copy()
+        velocity = self.data.qvel.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+
+        if self._use_contact_forces:
+            contact_force = self.contact_forces.flat.copy()
+            return np.concatenate((position, velocity, contact_force))
+        else:
+            return np.concatenate((position, velocity))
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = (
+            self.init_qvel
+            + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
+        )
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+
+        return observation
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/ant.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/ant.xml
@ -0,0 +1,81 @@
+<mujoco model="ant">
+  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
+  <option integrator="RK4" timestep="0.01"/>
+  <custom>
+    <numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
+  </custom>
+  <default>
+    <joint armature="1" damping="1" limited="true"/>
+    <geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
+  </default>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <body name="torso" pos="0 0 0.75">
+      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
+      <geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
+      <joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
+      <body name="front_left_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
+        <body name="aux_1" pos="0.2 0.2 0">
+          <joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
+          <body pos="0.2 0.2 0">
+            <joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="front_right_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
+        <body name="aux_2" pos="-0.2 0.2 0">
+          <joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
+          <body pos="-0.2 0.2 0">
+            <joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="back_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
+        <body name="aux_3" pos="-0.2 -0.2 0">
+          <joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
+          <body pos="-0.2 -0.2 0">
+            <joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="right_back_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
+        <body name="aux_4" pos="0.2 -0.2 0">
+          <joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
+          <body pos="0.2 -0.2 0">
+            <joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
+  </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/half_cheetah.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/half_cheetah.xml
@ -0,0 +1,96 @@
+<!-- Cheetah Model
+
+    The state space is populated with joints in the order that they are
+    defined in this file. The actuators also operate on joints.
+
+    State-Space (name/joint/parameter):
+        - rootx     slider      position (m)
+        - rootz     slider      position (m)
+        - rooty     hinge       angle (rad)
+        - bthigh    hinge       angle (rad)
+        - bshin     hinge       angle (rad)
+        - bfoot     hinge       angle (rad)
+        - fthigh    hinge       angle (rad)
+        - fshin     hinge       angle (rad)
+        - ffoot     hinge       angle (rad)
+        - rootx     slider      velocity (m/s)
+        - rootz     slider      velocity (m/s)
+        - rooty     hinge       angular velocity (rad/s)
+        - bthigh    hinge       angular velocity (rad/s)
+        - bshin     hinge       angular velocity (rad/s)
+        - bfoot     hinge       angular velocity (rad/s)
+        - fthigh    hinge       angular velocity (rad/s)
+        - fshin     hinge       angular velocity (rad/s)
+        - ffoot     hinge       angular velocity (rad/s)
+
+    Actuators (name/actuator/parameter):
+        - bthigh    hinge       torque (N m)
+        - bshin     hinge       torque (N m)
+        - bfoot     hinge       torque (N m)
+        - fthigh    hinge       torque (N m)
+        - fshin     hinge       torque (N m)
+        - ffoot     hinge       torque (N m)
+
+-->
+<mujoco model="cheetah">
+  <compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
+  <default>
+    <joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
+    <geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
+    <motor ctrllimited="true" ctrlrange="-1 1"/>
+  </default>
+  <size nstack="300000" nuser_geom="1"/>
+  <option gravity="0 0 -9.81" timestep="0.01"/>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <body name="torso" pos="0 0 .7">
+      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
+      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
+      <geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
+      <geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
+      <!-- <site name='tip'  pos='.15 0 .11'/>-->
+      <body name="bthigh" pos="-.5 0 0">
+        <joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
+        <geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
+        <body name="bshin" pos=".16 0 -.25">
+          <joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
+          <geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
+          <body name="bfoot" pos="-.28 0 -.14">
+            <joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
+            <geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="fthigh" pos=".5 0 0">
+        <joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
+        <geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
+        <body name="fshin" pos="-.14 0 -.24">
+          <joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
+          <geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
+          <body name="ffoot" pos=".13 0 -.18">
+            <joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
+            <geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor gear="120" joint="bthigh" name="bthigh"/>
+    <motor gear="90" joint="bshin" name="bshin"/>
+    <motor gear="60" joint="bfoot" name="bfoot"/>
+    <motor gear="120" joint="fthigh" name="fthigh"/>
+    <motor gear="60" joint="fshin" name="fshin"/>
+    <motor gear="30" joint="ffoot" name="ffoot"/>
+  </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/hopper.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/hopper.xml
@ -0,0 +1,53 @@
+<!--
+Hopper model for `Hopper-v5`, based on openai/gym/Walker2d
+modified by @saran_t
+  - To not require `coordinate="global"`
+-->
+<mujoco model="hopper">
+  <compiler angle="degree" inertiafromgeom="true"/>
+  <default>
+    <joint armature="1" damping="1" limited="true"/>
+    <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
+    <motor ctrllimited="true" ctrlrange="-.4 .4"/>
+  </default>
+  <option integrator="RK4" timestep="0.002"/>
+  <visual>
+    <map znear="0.02"/>
+  </visual>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
+    <body name="torso" pos="0 0 1.25">
+      <camera name="track" mode="trackcom" pos="0 -3 -0.25" xyaxes="1 0 0 0 0 1"/>
+      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 -1.25" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 -1.25" ref="1.25" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
+      <geom friction="0.9" name="torso_geom" size="0.05 0.19999999999999996" type="capsule"/>
+      <body name="thigh" pos="0 0 -0.19999999999999996">
+        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 0" range="-150 0" type="hinge"/>
+        <geom friction="0.9" pos="0 0 -0.22500000000000009" name="thigh_geom" size="0.05 0.22500000000000003" type="capsule"/>
+        <body name="leg" pos="0 0 -0.70000000000000007">
+          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
+          <geom friction="0.9" name="leg_geom" size="0.04 0.25" type="capsule"/>
+          <body name="foot" pos="0.13 0 -0.35">
+            <joint axis="0 -1 0" name="foot_joint" pos="-0.13 0 0.1" range="-45 45" type="hinge"/>
+            <geom friction="2.0" pos="-0.065 0 0.1" quat="0.70710678118654757 0 -0.70710678118654746 0" name="foot_geom" size="0.06 0.195" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
+  </actuator>
+    <asset>
+        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
+            width="100" height="100"/>
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/humanoid.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/humanoid.xml
@ -0,0 +1,121 @@
+<mujoco model="humanoid">
+    <compiler angle="degree" inertiafromgeom="true"/>
+    <default>
+        <joint armature="1" damping="1" limited="true"/>
+        <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
+        <motor ctrllimited="true" ctrlrange="-.4 .4"/>
+    </default>
+    <option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
+        <!-- <flags solverstat="enable" energy="enable"/>-->
+    </option>
+    <size nkey="5" nuser_geom="1"/>
+    <visual>
+        <map fogend="5" fogstart="3"/>
+    </visual>
+    <asset>
+        <texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
+        <!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+    <worldbody>
+        <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+        <geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
+        <!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
+        <body name="torso" pos="0 0 1.4">
+            <camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
+            <joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
+            <geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
+            <geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
+            <geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
+            <body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
+                <geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
+                <joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
+                <joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
+                <body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
+                    <joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
+                    <geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
+                    <body name="right_thigh" pos="0 -0.1 -0.04">
+                        <joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
+                        <body name="right_shin" pos="0 0.01 -0.403">
+                            <joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
+                            <geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
+                            <body name="right_foot" pos="0 0 -0.45">
+                                <geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="left_thigh" pos="0 0.1 -0.04">
+                        <joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
+                        <body name="left_shin" pos="0 -0.01 -0.403">
+                            <joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
+                            <geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
+                            <body name="left_foot" pos="0 0 -0.45">
+                                <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+            <body name="right_upper_arm" pos="0 -0.17 0.06">
+                <joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="right_lower_arm" pos=".18 -.18 -.18">
+                    <joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
+                    <geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
+                    <camera pos="0 0 0"/>
+                </body>
+            </body>
+            <body name="left_upper_arm" pos="0 0.17 0.06">
+                <joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="left_lower_arm" pos=".18 .18 -.18">
+                    <joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
+                    <geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+    <tendon>
+        <fixed name="left_hipknee">
+            <joint coef="-1" joint="left_hip_y"/>
+            <joint coef="1" joint="left_knee"/>
+        </fixed>
+        <fixed name="right_hipknee">
+            <joint coef="-1" joint="right_hip_y"/>
+            <joint coef="1" joint="right_knee"/>
+        </fixed>
+    </tendon>
+
+    <actuator>
+        <motor gear="100" joint="abdomen_y" name="abdomen_y"/>
+        <motor gear="100" joint="abdomen_z" name="abdomen_z"/>
+        <motor gear="100" joint="abdomen_x" name="abdomen_x"/>
+        <motor gear="100" joint="right_hip_x" name="right_hip_x"/>
+        <motor gear="100" joint="right_hip_z" name="right_hip_z"/>
+        <motor gear="300" joint="right_hip_y" name="right_hip_y"/>
+        <motor gear="200" joint="right_knee" name="right_knee"/>
+        <motor gear="100" joint="left_hip_x" name="left_hip_x"/>
+        <motor gear="100" joint="left_hip_z" name="left_hip_z"/>
+        <motor gear="300" joint="left_hip_y" name="left_hip_y"/>
+        <motor gear="200" joint="left_knee" name="left_knee"/>
+        <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
+        <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
+        <motor gear="25" joint="right_elbow" name="right_elbow"/>
+        <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
+        <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
+        <motor gear="25" joint="left_elbow" name="left_elbow"/>
+    </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/humanoidstandup.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/humanoidstandup.xml
@ -0,0 +1,121 @@
+<mujoco model="humanoidstandup">
+    <compiler angle="degree" inertiafromgeom="true"/>
+    <default>
+        <joint armature="1" damping="1" limited="true"/>
+        <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
+        <motor ctrllimited="true" ctrlrange="-.4 .4"/>
+    </default>
+    <option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
+        <!-- <flags solverstat="enable" energy="enable"/>-->
+    </option>
+    <size nkey="5" nuser_geom="1"/>
+    <visual>
+        <map fogend="5" fogstart="3"/>
+    </visual>
+    <asset>
+        <texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
+        <!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+    <worldbody>
+        <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+        <geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
+        <!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
+        <body name="torso" pos="0 0 .105">
+            <camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
+            <joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
+            <geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
+            <geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
+            <geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
+            <body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
+                <geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
+                <joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
+                <joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
+                <body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
+                    <joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
+                    <geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
+                    <body name="right_thigh" pos="0 -0.1 0">
+                        <joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
+                        <body name="right_shin" pos="0.403 0.01 0">
+                            <joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
+                            <geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
+                            <body name="right_foot" pos="0.35 0 -.10">
+                                <geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="left_thigh" pos="0 0.1 0">
+                        <joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
+                        <body name="left_shin" pos="0.403 -0.01 0">
+                            <joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
+                            <geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
+                            <body name="left_foot" pos="0.35 0 -.1">
+                                <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+            <body name="right_upper_arm" pos="0 -0.17 0.06">
+                <joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="right_lower_arm" pos=".18 -.18 -.18">
+                    <joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
+                    <geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
+                    <camera pos="0 0 0"/>
+                </body>
+            </body>
+            <body name="left_upper_arm" pos="0 0.17 0.06">
+                <joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="left_lower_arm" pos=".18 .18 -.18">
+                    <joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
+                    <geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+    <tendon>
+        <fixed name="left_hipknee">
+            <joint coef="-1" joint="left_hip_y"/>
+            <joint coef="1" joint="left_knee"/>
+        </fixed>
+        <fixed name="right_hipknee">
+            <joint coef="-1" joint="right_hip_y"/>
+            <joint coef="1" joint="right_knee"/>
+        </fixed>
+    </tendon>
+
+    <actuator>
+        <motor gear="100" joint="abdomen_y" name="abdomen_y"/>
+        <motor gear="100" joint="abdomen_z" name="abdomen_z"/>
+        <motor gear="100" joint="abdomen_x" name="abdomen_x"/>
+        <motor gear="100" joint="right_hip_x" name="right_hip_x"/>
+        <motor gear="100" joint="right_hip_z" name="right_hip_z"/>
+        <motor gear="300" joint="right_hip_y" name="right_hip_y"/>
+        <motor gear="200" joint="right_knee" name="right_knee"/>
+        <motor gear="100" joint="left_hip_x" name="left_hip_x"/>
+        <motor gear="100" joint="left_hip_z" name="left_hip_z"/>
+        <motor gear="300" joint="left_hip_y" name="left_hip_y"/>
+        <motor gear="200" joint="left_knee" name="left_knee"/>
+        <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
+        <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
+        <motor gear="25" joint="right_elbow" name="right_elbow"/>
+        <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
+        <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
+        <motor gear="25" joint="left_elbow" name="left_elbow"/>
+    </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/inverted_double_pendulum.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/inverted_double_pendulum.xml
@ -0,0 +1,47 @@
+<!-- Cartpole Model
+
+    The state space is populated with joints in the order that they are
+    defined in this file. The actuators also operate on joints.
+
+    State-Space (name/joint/parameter):
+        - cart      slider      position (m)
+        - pole      hinge       angle (rad)
+        - cart      slider      velocity (m/s)
+        - pole      hinge       angular velocity (rad/s)
+
+    Actuators (name/actuator/parameter):
+        - cart      motor       force x (N)
+
+-->
+<mujoco model="cartpole">
+  <compiler coordinate="local" inertiafromgeom="true"/>
+  <custom>
+    <numeric data="2" name="frame_skip"/>
+  </custom>
+  <default>
+    <joint damping="0.05"/>
+    <geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+  </default>
+  <option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
+  <size nstack="3000"/>
+  <worldbody>
+    <geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
+    <body name="cart" pos="0 0 0">
+      <joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
+      <geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
+      <body name="pole" pos="0 0 0">
+        <joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
+        <geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
+        <body name="pole2" pos="0 0 0.6">
+          <joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
+          <geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
+          <site name="tip" pos="0 0 .6" size="0.01 0.01"/>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
+  </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/inverted_pendulum.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/inverted_pendulum.xml
@ -0,0 +1,27 @@
+<mujoco model="inverted pendulum">
+	<compiler inertiafromgeom="true"/>
+	<default>
+		<joint armature="0" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+		<tendon/>
+		<motor ctrlrange="-3 3"/>
+	</default>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
+	<size nstack="3000"/>
+	<worldbody>
+		<!--geom name="ground" type="plane" pos="0 0 0" /-->
+		<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
+		<body name="cart" pos="0 0 0">
+			<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
+			<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
+			<body name="pole" pos="0 0 0">
+				<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
+				<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
+				<!--                 <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
+			</body>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor ctrllimited="true" ctrlrange="-3 3" gear="100" joint="slider" name="slide"/>
+	</actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/point.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/point.xml
@ -0,0 +1,31 @@
+<mujoco>
+  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
+  <option integrator="RK4" timestep="0.02"/>
+  <default>
+    <joint armature="0" damping="0" limited="false"/>
+    <geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
+  </default>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <body name="torso" pos="0 0 0">
+      <geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
+      <geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
+      <joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
+      <joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
+      <joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
+    </body>
+  </worldbody>
+  <actuator>
+    <!-- Those are just dummy actuators for providing ranges -->
+    <motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
+    <motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
+  </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/pusher.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/pusher.xml
@ -0,0 +1,91 @@
+<mujoco model="arm3d">
+  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
+  <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
+
+  <default>
+    <joint armature='0.04' damping="1" limited="true"/>
+    <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
+  </default>
+
+  <worldbody>
+    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
+    <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
+
+    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
+      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
+      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
+      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
+      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
+      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
+      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
+
+      <body name="r_shoulder_lift_link" pos="0.1 0 0">
+        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
+        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
+
+        <body name="r_upper_arm_roll_link" pos="0 0 0">
+          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
+
+          <body name="r_upper_arm_link" pos="0 0 0">
+            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
+
+            <body name="r_elbow_flex_link" pos="0.4 0 0">
+              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
+              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
+
+              <body name="r_forearm_roll_link" pos="0 0 0">
+                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
+
+                <body name="r_forearm_link" pos="0 0 0">
+                  <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
+
+                  <body name="r_wrist_flex_link" pos="0.321 0 0">
+                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
+                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
+
+                    <body name="r_wrist_roll_link" pos="0 0 0">
+                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
+                      <body name="tips_arm" pos="0 0 0">
+                        <geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
+                        <geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
+                      </body>
+                      <geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
+                      <geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
+                      <geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <!--<body name="object" pos="0.55 -0.3 -0.275" >-->
+    <body name="object" pos="0.45 -0.05 -0.275" >
+      <geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
+      <joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
+      <joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
+    </body>
+
+    <body name="goal" pos="0.45 -0.05 -0.3230">
+      <geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
+      <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
+      <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
+  </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/reacher.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/reacher.xml
@ -0,0 +1,39 @@
+<mujoco model="reacher">
+	<compiler angle="radian" inertiafromgeom="true"/>
+	<default>
+		<joint armature="1" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+	</default>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
+	<worldbody>
+		<!-- Arena -->
+		<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3  .3 .01 .3  .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<!-- Arm -->
+		<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
+		<body name="body0" pos="0 0 .01">
+			<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+			<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
+			<body name="body1" pos="0.1 0 0">
+				<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
+				<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+				<body name="fingertip" pos="0.11 0 0">
+					<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
+				</body>
+			</body>
+		</body>
+		<!-- Target -->
+		<body name="target" pos=".1 -.1 .01">
+			<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
+			<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
+			<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
+	</actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/swimmer.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/swimmer.xml
@ -0,0 +1,39 @@
+<mujoco model="swimmer">
+  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
+  <option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
+  <default>
+    <geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
+    <joint armature='0.1'  />
+  </default>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
+    <!--  ================= SWIMMER ================= /-->
+    <body name="torso" pos="0 0 0">
+      <camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1"/>
+      <geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
+      <joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
+      <joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
+      <joint axis="0 0 1" name="free_body_rot" pos="0 0 0" type="hinge"/>
+      <body name="mid" pos="0.5 0 0">
+        <geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
+        <joint axis="0 0 1" limited="true" name="motor1_rot" pos="0 0 0" range="-100 100" type="hinge"/>
+        <body name="back" pos="-1 0 0">
+          <geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
+          <joint axis="0 0 1" limited="true" name="motor2_rot" pos="0 0 0" range="-100 100" type="hinge"/>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="motor1_rot"/>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="motor2_rot"/>
+  </actuator>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/walker2d.xml
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/assets/walker2d.xml
@ -0,0 +1,68 @@
+<!--
+Walker2D model for `Walker2d-v5`, based on openai/gym/Walker2d
+modified by @kallinteris-Andreas
+  - To not require `coordinate="global"`
+  - keep feet friction to 0.9/1.9
+-->
+<mujoco model="walker2d">
+  <compiler angle="degree" inertiafromgeom="true"/>
+  <default>
+    <joint armature="0.01" damping=".1" limited="true"/>
+    <geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
+  </default>
+  <option integrator="RK4" timestep="0.002"/>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
+    <body name="torso" pos="0 0 1.25">
+      <camera name="track" mode="trackcom" pos="0 -3 -0.25" xyaxes="1 0 0 0 0 1"/>
+      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 -1.25" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 -1.25" ref="1.25" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
+      <geom friction="0.9" name="torso_geom" size="0.050000000000000003 0.19999999999999996" type="capsule"/>
+      <body name="thigh" pos="0 0 -0.19999999999999996">
+        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 0" range="-150 0" type="hinge"/>
+        <geom friction="0.9" pos="0 0 -0.22500000000000009" name="thigh_geom" size="0.050000000000000003 0.22500000000000003" type="capsule"/>
+        <body name="leg" pos="0 0 -0.70000000000000007">
+          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
+          <geom friction="0.9" name="leg_geom" size="0.040000000000000001 0.25" type="capsule"/>
+          <body name="foot" pos="0.20000000000000001 0 -0.34999999999999998">
+            <joint axis="0 -1 0" name="foot_joint" pos="-0.20000000000000001 0 0.10000000000000001" range="-45 45" type="hinge"/>
+            <geom friction="0.9" pos="-0.10000000000000001 0 0.10000000000000001" quat="0.70710678118654757 0 -0.70710678118654746 0" name="foot_geom" size="0.059999999999999998 0.10000000000000001" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
+      <body name="thigh_left" pos="0 0 -0.19999999999999996">
+        <joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 0" range="-150 0" type="hinge"/>
+        <geom friction="0.9" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.050000000000000003 0.22500000000000003" pos="0 0 -0.22500000000000009" type="capsule"/>
+        <body name="leg_left" pos="0 0 -0.70000000000000007">
+          <joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
+          <geom friction="0.9" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.040000000000000001 0.25" type="capsule"/>
+          <body name="foot_left" pos="0.20000000000000001 0 -0.34999999999999998">
+            <joint axis="0 -1 0" name="foot_left_joint" pos="-0.20000000000000001 0 0.10000000000000001" range="-45 45" type="hinge"/>
+            <geom friction="1.9" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.059999999999999998 0.10000000000000001" pos="-0.10000000000000001 0 0.10000000000000001" type="capsule" quat="0.70710678118654757 0 -0.70710678118654746 0"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
+    <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
+  </actuator>
+    <asset>
+        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
+            width="100" height="100"/>
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+</mujoco>
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/half_cheetah.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/half_cheetah.py
@ -0,0 +1,64 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def step(self, action):
+        xposbefore = self.sim.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        xposafter = self.sim.data.qpos[0]
+
+        ob = self._get_obs()
+        reward_ctrl = -0.1 * np.square(action).sum()
+        reward_run = (xposafter - xposbefore) / self.dt
+        reward = reward_ctrl + reward_run
+        terminated = False
+
+        if self.render_mode == "human":
+            self.render()
+        return (
+            ob,
+            reward,
+            terminated,
+            False,
+            dict(reward_run=reward_run, reward_ctrl=reward_ctrl),
+        )
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[1:],
+                self.sim.data.qvel.flat,
+            ]
+        )
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=-0.1, high=0.1, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/half_cheetah_v3.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/half_cheetah_v3.py
@ -0,0 +1,128 @@
+__credits__ = ["Rushiv Arora"]
+
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "distance": 4.0,
+}
+
+
+class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(
+        self,
+        xml_file="half_cheetah.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=0.1,
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            xml_file,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
+            )
+
+        MuJocoPyEnv.__init__(
+            self, xml_file, 5, observation_space=observation_space, **kwargs
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    def step(self, action):
+        x_position_before = self.sim.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.sim.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+
+        ctrl_cost = self.control_cost(action)
+
+        forward_reward = self._forward_reward_weight * x_velocity
+
+        observation = self._get_obs()
+        reward = forward_reward - ctrl_cost
+        terminated = False
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+            "reward_run": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = self.sim.data.qvel.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = (
+            self.init_qvel
+            + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/half_cheetah_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/half_cheetah_v4.py
@ -0,0 +1,244 @@
+__credits__ = ["Rushiv Arora"]
+
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "distance": 4.0,
+}
+
+
+class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment is based on the work by P. Wawrzyński in
+    ["A Cat-Like Robot Real-Time Learning to Run"](http://staff.elka.pw.edu.pl/~pwawrzyn/pub-s/0812_LSCLRR.pdf).
+    The HalfCheetah is a 2-dimensional robot consisting of 9 body parts and 8
+    joints connecting them (including two paws). The goal is to apply a torque
+    on the joints to make the cheetah run forward (right) as fast as possible,
+    with a positive reward allocated based on the distance moved forward and a
+    negative reward allocated for moving backward. The torso and head of the
+    cheetah are fixed, and the torque can only be applied on the other 6 joints
+    over the front and back thighs (connecting to the torso), shins
+    (connecting to the thighs) and feet (connecting to the shins).
+
+    ## Action Space
+    The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
+
+    | Num | Action                                  | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit         |
+    | --- | --------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
+    | 0   | Torque applied on the back thigh rotor  | -1          | 1           | bthigh                           | hinge | torque (N m) |
+    | 1   | Torque applied on the back shin rotor   | -1          | 1           | bshin                            | hinge | torque (N m) |
+    | 2   | Torque applied on the back foot rotor   | -1          | 1           | bfoot                            | hinge | torque (N m) |
+    | 3   | Torque applied on the front thigh rotor | -1          | 1           | fthigh                           | hinge | torque (N m) |
+    | 4   | Torque applied on the front shin rotor  | -1          | 1           | fshin                            | hinge | torque (N m) |
+    | 5   | Torque applied on the front foot rotor  | -1          | 1           | ffoot                            | hinge | torque (N m) |
+
+
+    ## Observation Space
+    Observations consist of positional values of different body parts of the
+    cheetah, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
+
+    By default, observations do not include the cheetah's `rootx`. It may
+    be included by passing `exclude_current_positions_from_observation=False` during construction.
+    In that case, the observation space will be a `Box(-Inf, Inf, (18,), float64)` where the first element
+    represents the `rootx`.
+    Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the
+    will be returned in `info` with key `"x_position"`.
+
+    However, by default, the observation is a `Box(-Inf, Inf, (17,), float64)` where the elements correspond to the following:
+
+    | Num | Observation                          | Min  | Max | Name (in corresponding XML file) | Joint | Unit                     |
+    | --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
+    | 0   | z-coordinate of the front tip        | -Inf | Inf | rootz                            | slide | position (m)             |
+    | 1   | angle of the front tip               | -Inf | Inf | rooty                            | hinge | angle (rad)              |
+    | 2   | angle of the second rotor            | -Inf | Inf | bthigh                           | hinge | angle (rad)              |
+    | 3   | angle of the second rotor            | -Inf | Inf | bshin                            | hinge | angle (rad)              |
+    | 4   | velocity of the tip along the x-axis | -Inf | Inf | bfoot                            | hinge | angle (rad)              |
+    | 5   | velocity of the tip along the y-axis | -Inf | Inf | fthigh                           | hinge | angle (rad)              |
+    | 6   | angular velocity of front tip        | -Inf | Inf | fshin                            | hinge | angle (rad)              |
+    | 7   | angular velocity of second rotor     | -Inf | Inf | ffoot                            | hinge | angle (rad)              |
+    | 8   | x-coordinate of the front tip        | -Inf | Inf | rootx                            | slide | velocity (m/s)           |
+    | 9   | y-coordinate of the front tip        | -Inf | Inf | rootz                            | slide | velocity (m/s)           |
+    | 10  | angle of the front tip               | -Inf | Inf | rooty                            | hinge | angular velocity (rad/s) |
+    | 11  | angle of the second rotor            | -Inf | Inf | bthigh                           | hinge | angular velocity (rad/s) |
+    | 12  | angle of the second rotor            | -Inf | Inf | bshin                            | hinge | angular velocity (rad/s) |
+    | 13  | velocity of the tip along the x-axis | -Inf | Inf | bfoot                            | hinge | angular velocity (rad/s) |
+    | 14  | velocity of the tip along the y-axis | -Inf | Inf | fthigh                           | hinge | angular velocity (rad/s) |
+    | 15  | angular velocity of front tip        | -Inf | Inf | fshin                            | hinge | angular velocity (rad/s) |
+    | 16  | angular velocity of second rotor     | -Inf | Inf | ffoot                            | hinge | angular velocity (rad/s) |
+    | excluded |  x-coordinate of the front tip  | -Inf | Inf | rootx                            | slide | position (m)             |
+
+    ## Rewards
+    The reward consists of two parts:
+    - *forward_reward*: A reward of moving forward which is measured
+    as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
+    the time between actions and is dependent on the frame_skip parameter
+    (fixed to 5), where the frametime is 0.01 - making the
+    default *dt = 5 * 0.01 = 0.05*. This reward would be positive if the cheetah
+    runs forward (right).
+    - *ctrl_cost*: A cost for penalising the cheetah if it takes
+    actions that are too large. It is measured as *`ctrl_cost_weight` *
+    sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
+    control and has a default value of 0.1
+
+    The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+    ## Starting State
+    All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,) with a noise added to the
+    initial state for stochasticity. As seen before, the first 8 values in the
+    state are positional and the last 9 values are velocity. A uniform noise in
+    the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the positional values while a standard
+    normal noise with a mean of 0 and standard deviation of `reset_noise_scale` is added to the
+    initial velocity values of all zeros.
+
+    ## Episode End
+    The episode truncates when the episode length is greater than 1000.
+
+    ## Arguments
+
+    No additional arguments are currently supported in v2 and lower.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('HalfCheetah-v2')
+    ```
+
+    v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('HalfCheetah-v4', ctrl_cost_weight=0.1, ....)
+    ```
+
+    | Parameter                                    | Type      | Default              | Description                                                                                                                                                       |
+    | -------------------------------------------- | --------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+    | `xml_file`                                   | **str**   | `"half_cheetah.xml"` | Path to a MuJoCo model                                                                                                                                            |
+    | `forward_reward_weight`                      | **float** | `1.0`                | Weight for _forward_reward_ term (see section on reward)                                                                                                          |
+    | `ctrl_cost_weight`                           | **float** | `0.1`                | Weight for _ctrl_cost_ weight (see section on reward)                                                                                                             |
+    | `reset_noise_scale`                          | **float** | `0.1`                | Scale of random perturbations of initial position and velocity (see section on Starting State)                                                                    |
+    | `exclude_current_positions_from_observation` | **bool**  | `True`               | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(
+        self,
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=0.1,
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
+            )
+
+        MujocoEnv.__init__(
+            self,
+            "half_cheetah.xml",
+            5,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    def step(self, action):
+        x_position_before = self.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+
+        ctrl_cost = self.control_cost(action)
+
+        forward_reward = self._forward_reward_weight * x_velocity
+
+        observation = self._get_obs()
+        reward = forward_reward - ctrl_cost
+        terminated = False
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+            "reward_run": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def _get_obs(self):
+        position = self.data.qpos.flat.copy()
+        velocity = self.data.qvel.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = (
+            self.init_qvel
+            + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/hopper.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/hopper.py
@ -0,0 +1,67 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class HopperEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 125,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self, "hopper.xml", 4, observation_space=observation_space, **kwargs
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def step(self, a):
+        posbefore = self.sim.data.qpos[0]
+        self.do_simulation(a, self.frame_skip)
+        posafter, height, ang = self.sim.data.qpos[0:3]
+
+        alive_bonus = 1.0
+        reward = (posafter - posbefore) / self.dt
+        reward += alive_bonus
+        reward -= 1e-3 * np.square(a).sum()
+        s = self.state_vector()
+        terminated = not (
+            np.isfinite(s).all()
+            and (np.abs(s[2:]) < 100).all()
+            and (height > 0.7)
+            and (abs(ang) < 0.2)
+        )
+        ob = self._get_obs()
+
+        if self.render_mode == "human":
+            self.render()
+        return ob, reward, terminated, False, {}
+
+    def _get_obs(self):
+        return np.concatenate(
+            [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
+        )
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.trackbodyid = 2
+        self.viewer.cam.distance = self.model.stat.extent * 0.75
+        self.viewer.cam.lookat[2] = 1.15
+        self.viewer.cam.elevation = -20
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/hopper_v3.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/hopper_v3.py
@ -0,0 +1,178 @@
+__credits__ = ["Rushiv Arora"]
+
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 2,
+    "distance": 3.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
+}
+
+
+class HopperEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 125,
+    }
+
+    def __init__(
+        self,
+        xml_file="hopper.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_state_range=(-100.0, 100.0),
+        healthy_z_range=(0.7, float("inf")),
+        healthy_angle_range=(-0.2, 0.2),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            xml_file,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_state_range,
+            healthy_z_range,
+            healthy_angle_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+
+        self._healthy_state_range = healthy_state_range
+        self._healthy_z_range = healthy_z_range
+        self._healthy_angle_range = healthy_angle_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
+            )
+
+        MuJocoPyEnv.__init__(
+            self, xml_file, 4, observation_space=observation_space, **kwargs
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    @property
+    def is_healthy(self):
+        z, angle = self.sim.data.qpos[1:3]
+        state = self.state_vector()[2:]
+
+        min_state, max_state = self._healthy_state_range
+        min_z, max_z = self._healthy_z_range
+        min_angle, max_angle = self._healthy_angle_range
+
+        healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
+        healthy_z = min_z < z < max_z
+        healthy_angle = min_angle < angle < max_angle
+
+        is_healthy = all((healthy_state, healthy_z, healthy_angle))
+
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = not self.is_healthy if self._terminate_when_unhealthy else False
+        return terminated
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
+
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+
+    def step(self, action):
+        x_position_before = self.sim.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.sim.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+
+        ctrl_cost = self.control_cost(action)
+
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost
+
+        observation = self._get_obs()
+        reward = rewards - costs
+        terminated = self.terminated
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/hopper_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/hopper_v4.py
@ -0,0 +1,298 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 2,
+    "distance": 3.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
+}
+
+
+class HopperEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment is based on the work done by Erez, Tassa, and Todorov in
+    ["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf). The environment aims to
+    increase the number of independent state and control variables as compared to
+    the classic control environments. The hopper is a two-dimensional
+    one-legged figure that consist of four main body parts - the torso at the
+    top, the thigh in the middle, the leg in the bottom, and a single foot on
+    which the entire body rests. The goal is to make hops that move in the
+    forward (right) direction by applying torques on the three hinges
+    connecting the four body parts.
+
+    ## Action Space
+    The action space is a `Box(-1, 1, (3,), float32)`. An action represents the torques applied at the hinge joints.
+
+    | Num | Action                             | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit         |
+    |-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+    | 0   | Torque applied on the thigh rotor  | -1          | 1           | thigh_joint                      | hinge | torque (N m) |
+    | 1   | Torque applied on the leg rotor    | -1          | 1           | leg_joint                        | hinge | torque (N m) |
+    | 2   | Torque applied on the foot rotor   | -1          | 1           | foot_joint                       | hinge | torque (N m) |
+
+    ## Observation Space
+    Observations consist of positional values of different body parts of the
+    hopper, followed by the velocities of those individual parts
+    (their derivatives) with all the positions ordered before all the velocities.
+
+    By default, observations do not include the x-coordinate of the hopper. It may
+    be included by passing `exclude_current_positions_from_observation=False` during construction.
+    In that case, the observation space will be `Box(-Inf, Inf, (12,), float64)` where the first observation
+    represents the x-coordinate of the hopper.
+    Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
+    will be returned in `info` with key `"x_position"`.
+
+    However, by default, the observation is a `Box(-Inf, Inf, (11,), float64)` where the elements
+    correspond to the following:
+
+    | Num | Observation                                        | Min  | Max | Name (in corresponding XML file) | Joint | Unit                     |
+    | --- | -------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
+    | 0   | z-coordinate of the torso (height of hopper)       | -Inf | Inf | rootz                            | slide | position (m)             |
+    | 1   | angle of the torso                                 | -Inf | Inf | rooty                            | hinge | angle (rad)              |
+    | 2   | angle of the thigh joint                           | -Inf | Inf | thigh_joint                      | hinge | angle (rad)              |
+    | 3   | angle of the leg joint                             | -Inf | Inf | leg_joint                        | hinge | angle (rad)              |
+    | 4   | angle of the foot joint                            | -Inf | Inf | foot_joint                       | hinge | angle (rad)              |
+    | 5   | velocity of the x-coordinate of the torso          | -Inf | Inf | rootx                          | slide | velocity (m/s)           |
+    | 6   | velocity of the z-coordinate (height) of the torso | -Inf | Inf | rootz                          | slide | velocity (m/s)           |
+    | 7   | angular velocity of the angle of the torso         | -Inf | Inf | rooty                          | hinge | angular velocity (rad/s) |
+    | 8   | angular velocity of the thigh hinge                | -Inf | Inf | thigh_joint                      | hinge | angular velocity (rad/s) |
+    | 9   | angular velocity of the leg hinge                  | -Inf | Inf | leg_joint                        | hinge | angular velocity (rad/s) |
+    | 10  | angular velocity of the foot hinge                 | -Inf | Inf | foot_joint                       | hinge | angular velocity (rad/s) |
+    | excluded | x-coordinate of the torso                     | -Inf | Inf | rootx                            | slide | position (m)             |
+
+
+    ## Rewards
+    The reward consists of three parts:
+    - *healthy_reward*: Every timestep that the hopper is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`.
+    - *forward_reward*: A reward of hopping forward which is measured
+    as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
+    the time between actions and is dependent on the frame_skip parameter
+    (fixed to 4), where the frametime is 0.002 - making the
+    default *dt = 4 * 0.002 = 0.008*. This reward would be positive if the hopper
+    hops forward (positive x direction).
+    - *ctrl_cost*: A cost for penalising the hopper if it takes
+    actions that are too large. It is measured as *`ctrl_cost_weight` *
+    sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
+    control and has a default value of 0.001
+
+    The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+    ## Starting State
+    All observations start in state
+    (0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise
+     in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity.
+
+    ## Episode End
+    The hopper is said to be unhealthy if any of the following happens:
+
+    1. An element of `observation[1:]` (if  `exclude_current_positions_from_observation=True`, else `observation[2:]`) is no longer contained in the closed interval specified by the argument `healthy_state_range`
+    2. The height of the hopper (`observation[0]` if  `exclude_current_positions_from_observation=True`, else `observation[1]`) is no longer contained in the closed interval specified by the argument `healthy_z_range` (usually meaning that it has fallen)
+    3. The angle (`observation[1]` if  `exclude_current_positions_from_observation=True`, else `observation[2]`) is no longer contained in the closed interval specified by the argument `healthy_angle_range`
+
+    If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+    the episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches a 1000 timesteps
+    2. Termination: The hopper is unhealthy
+
+    If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+    ## Arguments
+
+    No additional arguments are currently supported in v2 and lower.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Hopper-v2')
+    ```
+
+    v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Hopper-v4', ctrl_cost_weight=0.1, ....)
+    ```
+
+    | Parameter                                    | Type      | Default               | Description                                                                                                                                                                     |
+    | -------------------------------------------- | --------- | --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+    | `xml_file`                                   | **str**   | `"hopper.xml"`        | Path to a MuJoCo model                                                                                                                                                          |
+    | `forward_reward_weight`                      | **float** | `1.0`                 | Weight for _forward_reward_ term (see section on reward)                                                                                                                        |
+    | `ctrl_cost_weight`                           | **float** | `0.001`               | Weight for _ctrl_cost_ reward (see section on reward)                                                                                                                           |
+    | `healthy_reward`                             | **float** | `1`                   | Constant reward given if the ant is "healthy" after timestep                                                                                                                    |
+    | `terminate_when_unhealthy`                   | **bool**  | `True`                | If true, issue a done signal if the hopper is no longer healthy                                                                                                                 |
+    | `healthy_state_range`                        | **tuple** | `(-100, 100)`         | The elements of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) must be in this range for the hopper to be considered healthy  |
+    | `healthy_z_range`                            | **tuple** | `(0.7, float("inf"))` | The z-coordinate must be in this range for the hopper to be considered healthy                                                                                                  |
+    | `healthy_angle_range`                        | **tuple** | `(-0.2, 0.2)`         | The angle given by `observation[1]` (if `exclude_current_positions_from_observation=True`, else `observation[2]`) must be in this range for the hopper to be considered healthy |
+    | `reset_noise_scale`                          | **float** | `5e-3`                | Scale of random perturbations of initial position and velocity (see section on Starting State)                                                                                  |
+    | `exclude_current_positions_from_observation` | **bool**  | `True`                | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies               |
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 125,
+    }
+
+    def __init__(
+        self,
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_state_range=(-100.0, 100.0),
+        healthy_z_range=(0.7, float("inf")),
+        healthy_angle_range=(-0.2, 0.2),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_state_range,
+            healthy_z_range,
+            healthy_angle_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+
+        self._healthy_state_range = healthy_state_range
+        self._healthy_z_range = healthy_z_range
+        self._healthy_angle_range = healthy_angle_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
+            )
+
+        MujocoEnv.__init__(
+            self,
+            "hopper.xml",
+            4,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    @property
+    def is_healthy(self):
+        z, angle = self.data.qpos[1:3]
+        state = self.state_vector()[2:]
+
+        min_state, max_state = self._healthy_state_range
+        min_z, max_z = self._healthy_z_range
+        min_angle, max_angle = self._healthy_angle_range
+
+        healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
+        healthy_z = min_z < z < max_z
+        healthy_angle = min_angle < angle < max_angle
+
+        is_healthy = all((healthy_state, healthy_z, healthy_angle))
+
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = not self.is_healthy if self._terminate_when_unhealthy else False
+        return terminated
+
+    def _get_obs(self):
+        position = self.data.qpos.flat.copy()
+        velocity = np.clip(self.data.qvel.flat.copy(), -10, 10)
+
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+
+    def step(self, action):
+        x_position_before = self.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+
+        ctrl_cost = self.control_cost(action)
+
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost
+
+        observation = self._get_obs()
+        reward = rewards - costs
+        terminated = self.terminated
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoid.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoid.py
@ -0,0 +1,94 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+def mass_center(model, sim):
+    mass = np.expand_dims(model.body_mass, 1)
+    xpos = sim.data.xipos
+    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
+
+
+class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 67,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(
+            low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
+        )
+        MuJocoPyEnv.__init__(
+            self, "humanoid.xml", 5, observation_space=observation_space, **kwargs
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def _get_obs(self):
+        data = self.sim.data
+        return np.concatenate(
+            [
+                data.qpos.flat[2:],
+                data.qvel.flat,
+                data.cinert.flat,
+                data.cvel.flat,
+                data.qfrc_actuator.flat,
+                data.cfrc_ext.flat,
+            ]
+        )
+
+    def step(self, a):
+        pos_before = mass_center(self.model, self.sim)
+        self.do_simulation(a, self.frame_skip)
+        pos_after = mass_center(self.model, self.sim)
+
+        alive_bonus = 5.0
+        data = self.sim.data
+        lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
+        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
+        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = min(quad_impact_cost, 10)
+        reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
+        qpos = self.sim.data.qpos
+        terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
+
+        if self.render_mode == "human":
+            self.render()
+        return (
+            self._get_obs(),
+            reward,
+            terminated,
+            False,
+            dict(
+                reward_linvel=lin_vel_cost,
+                reward_quadctrl=-quad_ctrl_cost,
+                reward_alive=alive_bonus,
+                reward_impact=-quad_impact_cost,
+            ),
+        )
+
+    def reset_model(self):
+        c = 0.01
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(
+                low=-c,
+                high=c,
+                size=self.model.nv,
+            ),
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.trackbodyid = 1
+        self.viewer.cam.distance = self.model.stat.extent * 1.0
+        self.viewer.cam.lookat[2] = 2.0
+        self.viewer.cam.elevation = -20
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoid_v3.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoid_v3.py
@ -0,0 +1,200 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 1,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 2.0)),
+    "elevation": -20.0,
+}
+
+
+def mass_center(model, sim):
+    mass = np.expand_dims(model.body_mass, axis=1)
+    xpos = sim.data.xipos
+    return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
+
+
+class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 67,
+    }
+
+    def __init__(
+        self,
+        xml_file="humanoid.xml",
+        forward_reward_weight=1.25,
+        ctrl_cost_weight=0.1,
+        contact_cost_weight=5e-7,
+        contact_cost_range=(-np.inf, 10.0),
+        healthy_reward=5.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(1.0, 2.0),
+        reset_noise_scale=1e-2,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            xml_file,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            contact_cost_weight,
+            contact_cost_range,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_z_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+        self._contact_cost_weight = contact_cost_weight
+        self._contact_cost_range = contact_cost_range
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+        self._healthy_z_range = healthy_z_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
+            )
+
+        MuJocoPyEnv.__init__(
+            self, xml_file, 5, observation_space=observation_space, **kwargs
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl))
+        return control_cost
+
+    @property
+    def contact_cost(self):
+        contact_forces = self.sim.data.cfrc_ext
+        contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces))
+        min_cost, max_cost = self._contact_cost_range
+        contact_cost = np.clip(contact_cost, min_cost, max_cost)
+        return contact_cost
+
+    @property
+    def is_healthy(self):
+        min_z, max_z = self._healthy_z_range
+        is_healthy = min_z < self.sim.data.qpos[2] < max_z
+
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
+        return terminated
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = self.sim.data.qvel.flat.copy()
+
+        com_inertia = self.sim.data.cinert.flat.copy()
+        com_velocity = self.sim.data.cvel.flat.copy()
+
+        actuator_forces = self.sim.data.qfrc_actuator.flat.copy()
+        external_contact_forces = self.sim.data.cfrc_ext.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+
+        return np.concatenate(
+            (
+                position,
+                velocity,
+                com_inertia,
+                com_velocity,
+                actuator_forces,
+                external_contact_forces,
+            )
+        )
+
+    def step(self, action):
+        xy_position_before = mass_center(self.model, self.sim)
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = mass_center(self.model, self.sim)
+
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+
+        ctrl_cost = self.control_cost(action)
+        contact_cost = self.contact_cost
+
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost + contact_cost
+
+        observation = self._get_obs()
+        reward = rewards - costs
+        terminated = self.terminated
+        info = {
+            "reward_linvel": forward_reward,
+            "reward_quadctrl": -ctrl_cost,
+            "reward_alive": healthy_reward,
+            "reward_impact": -contact_cost,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoid_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoid_v4.py
@ -0,0 +1,422 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 1,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 2.0)),
+    "elevation": -20.0,
+}
+
+
+def mass_center(model, data):
+    mass = np.expand_dims(model.body_mass, axis=1)
+    xpos = data.xipos
+    return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
+
+
+class HumanoidEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment is based on the environment introduced by Tassa, Erez and Todorov
+    in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
+    The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a pair of
+    legs and arms. The legs each consist of three body parts, and the arms 2 body parts (representing the knees and
+    elbows respectively). The goal of the environment is to walk forward as fast as possible without falling over.
+
+    ## Action Space
+    The action space is a `Box(-1, 1, (17,), float32)`. An action represents the torques applied at the hinge joints.
+
+    | Num | Action                    | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+    |-----|----------------------|---------------|----------------|---------------------------------------|-------|------|
+    | 0   | Torque applied on the hinge in the y-coordinate of the abdomen                     | -0.4 | 0.4 | abdomen_y                   | hinge | torque (N m) |
+    | 1   | Torque applied on the hinge in the z-coordinate of the abdomen                     | -0.4 | 0.4 | abdomen_z                   | hinge | torque (N m) |
+    | 2   | Torque applied on the hinge in the x-coordinate of the abdomen                     | -0.4 | 0.4 | abdomen_x                   | hinge | torque (N m) |
+    | 3   | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh)   | hinge | torque (N m) |
+    | 4   | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh)   | hinge | torque (N m) |
+    | 5   | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh)   | hinge | torque (N m) |
+    | 6   | Torque applied on the rotor between the right hip/thigh and the right shin         | -0.4 | 0.4 | right_knee                  | hinge | torque (N m) |
+    | 7   | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate)  | -0.4 | 0.4 | left_hip_x (left_thigh)     | hinge | torque (N m) |
+    | 8   | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate)  | -0.4 | 0.4 | left_hip_z (left_thigh)     | hinge | torque (N m) |
+    | 9   | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate)  | -0.4 | 0.4 | left_hip_y (left_thigh)     | hinge | torque (N m) |
+    | 10  | Torque applied on the rotor between the left hip/thigh and the left shin           | -0.4 | 0.4 | left_knee                   | hinge | torque (N m) |
+    | 11  | Torque applied on the rotor between the torso and right upper arm (coordinate -1)  | -0.4 | 0.4 | right_shoulder1             | hinge | torque (N m) |
+    | 12  | Torque applied on the rotor between the torso and right upper arm (coordinate -2)  | -0.4 | 0.4 | right_shoulder2             | hinge | torque (N m) |
+    | 13  | Torque applied on the rotor between the right upper arm and right lower arm        | -0.4 | 0.4 | right_elbow                 | hinge | torque (N m) |
+    | 14  | Torque applied on the rotor between the torso and left upper arm (coordinate -1)   | -0.4 | 0.4 | left_shoulder1              | hinge | torque (N m) |
+    | 15  | Torque applied on the rotor between the torso and left upper arm (coordinate -2)   | -0.4 | 0.4 | left_shoulder2              | hinge | torque (N m) |
+    | 16  | Torque applied on the rotor between the left upper arm and left lower arm          | -0.4 | 0.4 | left_elbow                  | hinge | torque (N m) |
+
+    ## Observation Space
+    Observations consist of positional values of different body parts of the Humanoid,
+    followed by the velocities of those individual parts (their derivatives) with all the
+    positions ordered before all the velocities.
+
+    By default, observations do not include the x- and y-coordinates of the torso. These may
+    be included by passing `exclude_current_positions_from_observation=False` during construction.
+    In that case, the observation space will be a `Box(-Inf, Inf, (378,), float64)` where the first two observations
+    represent the x- and y-coordinates of the torso.
+    Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
+    will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
+
+    However, by default, the observation is a `Box(-Inf, Inf, (376,), float64)`. The elements correspond to the following:
+
+    | Num | Observation                                                                                                     | Min  | Max | Name (in corresponding XML file) | Joint | Unit                       |
+    | --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
+    | 0   | z-coordinate of the torso (centre)                                                                              | -Inf | Inf | root                             | free  | position (m)               |
+    | 1   | x-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 2   | y-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 3   | z-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 4   | w-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 5   | z-angle of the abdomen (in lower_waist)                                                                         | -Inf | Inf | abdomen_z                        | hinge | angle (rad)                |
+    | 6   | y-angle of the abdomen (in lower_waist)                                                                         | -Inf | Inf | abdomen_y                        | hinge | angle (rad)                |
+    | 7   | x-angle of the abdomen (in pelvis)                                                                              | -Inf | Inf | abdomen_x                        | hinge | angle (rad)                |
+    | 8   | x-coordinate of angle between pelvis and right hip (in right_thigh)                                             | -Inf | Inf | right_hip_x                      | hinge | angle (rad)                |
+    | 9   | z-coordinate of angle between pelvis and right hip (in right_thigh)                                             | -Inf | Inf | right_hip_z                      | hinge | angle (rad)                |
+    | 10  | y-coordinate of angle between pelvis and right hip (in right_thigh)                                             | -Inf | Inf | right_hip_y                      | hinge | angle (rad)                |
+    | 11  | angle between right hip and the right shin (in right_knee)                                                      | -Inf | Inf | right_knee                       | hinge | angle (rad)                |
+    | 12  | x-coordinate of angle between pelvis and left hip (in left_thigh)                                               | -Inf | Inf | left_hip_x                       | hinge | angle (rad)                |
+    | 13  | z-coordinate of angle between pelvis and left hip (in left_thigh)                                               | -Inf | Inf | left_hip_z                       | hinge | angle (rad)                |
+    | 14  | y-coordinate of angle between pelvis and left hip (in left_thigh)                                               | -Inf | Inf | left_hip_y                       | hinge | angle (rad)                |
+    | 15  | angle between left hip and the left shin (in left_knee)                                                         | -Inf | Inf | left_knee                        | hinge | angle (rad)                |
+    | 16  | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm)                                | -Inf | Inf | right_shoulder1                  | hinge | angle (rad)                |
+    | 17  | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm)                                | -Inf | Inf | right_shoulder2                  | hinge | angle (rad)                |
+    | 18  | angle between right upper arm and right_lower_arm                                                               | -Inf | Inf | right_elbow                      | hinge | angle (rad)                |
+    | 19  | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm)                                  | -Inf | Inf | left_shoulder1                   | hinge | angle (rad)                |
+    | 20  | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm)                                  | -Inf | Inf | left_shoulder2                   | hinge | angle (rad)                |
+    | 21  | angle between left upper arm and left_lower_arm                                                                 | -Inf | Inf | left_elbow                       | hinge | angle (rad)                |
+    | 22  | x-coordinate velocity of the torso (centre)                                                                     | -Inf | Inf | root                             | free  | velocity (m/s)             |
+    | 23  | y-coordinate velocity of the torso (centre)                                                                     | -Inf | Inf | root                             | free  | velocity (m/s)             |
+    | 24  | z-coordinate velocity of the torso (centre)                                                                     | -Inf | Inf | root                             | free  | velocity (m/s)             |
+    | 25  | x-coordinate angular velocity of the torso (centre)                                                             | -Inf | Inf | root                             | free  | anglular velocity (rad/s)  |
+    | 26  | y-coordinate angular velocity of the torso (centre)                                                             | -Inf | Inf | root                             | free  | anglular velocity (rad/s)  |
+    | 27  | z-coordinate angular velocity of the torso (centre)                                                             | -Inf | Inf | root                             | free  | anglular velocity (rad/s)  |
+    | 28  | z-coordinate of angular velocity of the abdomen (in lower_waist)                                                | -Inf | Inf | abdomen_z                        | hinge | anglular velocity (rad/s)  |
+    | 29  | y-coordinate of angular velocity of the abdomen (in lower_waist)                                                | -Inf | Inf | abdomen_y                        | hinge | anglular velocity (rad/s)  |
+    | 30  | x-coordinate of angular velocity of the abdomen (in pelvis)                                                     | -Inf | Inf | abdomen_x                        | hinge | aanglular velocity (rad/s) |
+    | 31  | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh)                 | -Inf | Inf | right_hip_x                      | hinge | anglular velocity (rad/s)  |
+    | 32  | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh)                 | -Inf | Inf | right_hip_z                      | hinge | anglular velocity (rad/s)  |
+    | 33  | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh)                 | -Inf | Inf | right_hip_y                      | hinge | anglular velocity (rad/s)  |
+    | 34  | angular velocity of the angle between right hip and the right shin (in right_knee)                              | -Inf | Inf | right_knee                       | hinge | anglular velocity (rad/s)  |
+    | 35  | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh)                   | -Inf | Inf | left_hip_x                       | hinge | anglular velocity (rad/s)  |
+    | 36  | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh)                   | -Inf | Inf | left_hip_z                       | hinge | anglular velocity (rad/s)  |
+    | 37  | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh)                   | -Inf | Inf | left_hip_y                       | hinge | anglular velocity (rad/s)  |
+    | 38  | angular velocity of the angle between left hip and the left shin (in left_knee)                                 | -Inf | Inf | left_knee                        | hinge | anglular velocity (rad/s)  |
+    | 39  | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1                  | hinge | anglular velocity (rad/s)  |
+    | 40  | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2                  | hinge | anglular velocity (rad/s)  |
+    | 41  | angular velocity of the angle between right upper arm and right_lower_arm                                       | -Inf | Inf | right_elbow                      | hinge | anglular velocity (rad/s)  |
+    | 42  | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm)   | -Inf | Inf | left_shoulder1                   | hinge | anglular velocity (rad/s)  |
+    | 43  | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm)   | -Inf | Inf | left_shoulder2                   | hinge | anglular velocity (rad/s)  |
+    | 44  | angular velocity of the angle between left upper arm and left_lower_arm                                         | -Inf | Inf | left_elbow                       | hinge | anglular velocity (rad/s)  |
+    | excluded | x-coordinate of the torso (centre)                                                                         | -Inf | Inf | root                             | free  | position (m)               |
+    | excluded | y-coordinate of the torso (centre)                                                                         | -Inf | Inf | root                             | free  | position (m)               |
+
+    Additionally, after all the positional and velocity based values in the table,
+    the observation contains (in order):
+    - *cinert:* Mass and inertia of a single rigid body relative to the center of mass
+    (this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
+    and hence adds to another 140 elements in the state space.
+    - *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
+    adds another 84 elements in the state space
+    - *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
+    `(23,)`  *(nv * 1)* and hence adds another 23 elements to the state space.
+    - *cfrc_ext:* This is the center of mass based external force on the body.  It has shape
+    14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
+    where *nbody* stands for the number of bodies in the robot and *nv* stands for the
+    number of degrees of freedom (*= dim(qvel)*)
+
+    The body parts are:
+
+    | id (for `v2`,`v3`,`v4`) | body part |
+    | --- |  ------------  |
+    | 0   | worldBody (note: all values are constant 0) |
+    | 1   | torso |
+    | 2   | lwaist |
+    | 3   | pelvis |
+    | 4   | right_thigh |
+    | 5   | right_sin |
+    | 6   | right_foot |
+    | 7   | left_thigh |
+    | 8   | left_sin |
+    | 9   | left_foot |
+    | 10  | right_upper_arm |
+    | 11  | right_lower_arm |
+    | 12  | left_upper_arm |
+    | 13  | left_lower_arm |
+
+    The joints are:
+
+    | id (for `v2`,`v3`,`v4`) | joint |
+    | --- |  ------------  |
+    | 0   | root |
+    | 1   | root |
+    | 2   | root |
+    | 3   | root |
+    | 4   | root |
+    | 5   | root |
+    | 6   | abdomen_z |
+    | 7   | abdomen_y |
+    | 8   | abdomen_x |
+    | 9   | right_hip_x |
+    | 10  | right_hip_z |
+    | 11  | right_hip_y |
+    | 12  | right_knee |
+    | 13  | left_hip_x |
+    | 14  | left_hiz_z |
+    | 15  | left_hip_y |
+    | 16  | left_knee |
+    | 17  | right_shoulder1 |
+    | 18  | right_shoulder2 |
+    | 19  | right_elbow|
+    | 20  | left_shoulder1 |
+    | 21  | left_shoulder2 |
+    | 22  | left_elfbow |
+
+    The (x,y,z) coordinates are translational DOFs while the orientations are rotational
+    DOFs expressed as quaternions. One can read more about free joints on the
+    [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
+
+    **Note:** Humanoid-v4 environment no longer has the following contact forces issue.
+    If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0
+    results in the contact forces always being 0. As such we recommend to use a Mujoco-Py
+    version < 2.0 when using the Humanoid environment if you would like to report results
+    with contact forces (if contact forces are not used in your experiments, you can use
+    version > 2.0).
+
+    ## Rewards
+    The reward consists of three parts:
+    - *healthy_reward*: Every timestep that the humanoid is alive (see section Episode Termination for definition), it gets a reward of fixed value `healthy_reward`
+    - *forward_reward*: A reward of walking forward which is measured as *`forward_reward_weight` *
+    (average center of mass before action - average center of mass after action)/dt*.
+    *dt* is the time between actions and is dependent on the frame_skip parameter
+    (default is 5), where the frametime is 0.003 - making the default *dt = 5 * 0.003 = 0.015*.
+    This reward would be positive if the humanoid walks forward (in positive x-direction). The calculation
+    for the center of mass is defined in the `.py` file for the Humanoid.
+    - *ctrl_cost*: A negative reward for penalising the humanoid if it has too
+    large of a control force. If there are *nu* actuators/controls, then the control has
+    shape  `nu x 1`. It is measured as *`ctrl_cost_weight` * sum(control<sup>2</sup>)*.
+    - *contact_cost*: A negative reward for penalising the humanoid if the external
+    contact force is too large. It is calculated by clipping
+    *`contact_cost_weight` * sum(external contact force<sup>2</sup>)* to the interval specified by `contact_cost_range`.
+
+    The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms
+
+    ## Starting State
+    All observations start in state
+    (0.0, 0.0,  1.4, 1.0, 0.0  ... 0.0) with a uniform noise in the range
+    of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional and velocity values (values in the table)
+    for stochasticity. Note that the initial z coordinate is intentionally
+    selected to be high, thereby indicating a standing up humanoid. The initial
+    orientation is designed to make it face forward as well.
+
+    ## Episode End
+    The humanoid is said to be unhealthy if the z-position of the torso is no longer contained in the
+    closed interval specified by the argument `healthy_z_range`.
+
+    If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+    the episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches a 1000 timesteps
+    3. Termination: The humanoid is unhealthy
+
+    If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+    ## Arguments
+
+    No additional arguments are currently supported in v2 and lower.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Humanoid-v4')
+    ```
+
+    v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Humanoid-v4', ctrl_cost_weight=0.1, ....)
+    ```
+
+    | Parameter                                    | Type      | Default          | Description                                                                                                                                                               |
+    | -------------------------------------------- | --------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+    | `xml_file`                                   | **str**   | `"humanoid.xml"` | Path to a MuJoCo model                                                                                                                                                    |
+    | `forward_reward_weight`                      | **float** | `1.25`           | Weight for _forward_reward_ term (see section on reward)                                                                                                                  |
+    | `ctrl_cost_weight`                           | **float** | `0.1`            | Weight for _ctrl_cost_ term (see section on reward)                                                                                                                       |
+    | `contact_cost_weight`                        | **float** | `5e-7`           | Weight for _contact_cost_ term (see section on reward)                                                                                                                    |
+    | `healthy_reward`                             | **float** | `5.0`            | Constant reward given if the humanoid is "healthy" after timestep                                                                                                         |
+    | `terminate_when_unhealthy`                   | **bool**  | `True`           | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range`                                                                       |
+    | `healthy_z_range`                            | **tuple** | `(1.0, 2.0)`     | The humanoid is considered healthy if the z-coordinate of the torso is in this range                                                                                      |
+    | `reset_noise_scale`                          | **float** | `1e-2`           | Scale of random perturbations of initial position and velocity (see section on Starting State)                                                                            |
+    | `exclude_current_positions_from_observation` | **bool**  | `True`           | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 67,
+    }
+
+    def __init__(
+        self,
+        forward_reward_weight=1.25,
+        ctrl_cost_weight=0.1,
+        healthy_reward=5.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(1.0, 2.0),
+        reset_noise_scale=1e-2,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_z_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+        self._healthy_z_range = healthy_z_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
+            )
+
+        MujocoEnv.__init__(
+            self,
+            "humanoid.xml",
+            5,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(self.data.ctrl))
+        return control_cost
+
+    @property
+    def is_healthy(self):
+        min_z, max_z = self._healthy_z_range
+        is_healthy = min_z < self.data.qpos[2] < max_z
+
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
+        return terminated
+
+    def _get_obs(self):
+        position = self.data.qpos.flat.copy()
+        velocity = self.data.qvel.flat.copy()
+
+        com_inertia = self.data.cinert.flat.copy()
+        com_velocity = self.data.cvel.flat.copy()
+
+        actuator_forces = self.data.qfrc_actuator.flat.copy()
+        external_contact_forces = self.data.cfrc_ext.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+
+        return np.concatenate(
+            (
+                position,
+                velocity,
+                com_inertia,
+                com_velocity,
+                actuator_forces,
+                external_contact_forces,
+            )
+        )
+
+    def step(self, action):
+        xy_position_before = mass_center(self.model, self.data)
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = mass_center(self.model, self.data)
+
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+
+        ctrl_cost = self.control_cost(action)
+
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+
+        observation = self._get_obs()
+        reward = rewards - ctrl_cost
+        terminated = self.terminated
+        info = {
+            "reward_linvel": forward_reward,
+            "reward_quadctrl": -ctrl_cost,
+            "reward_alive": healthy_reward,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+        return observation, reward, terminated, False, info
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoidstandup.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoidstandup.py
@ -0,0 +1,87 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 67,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(
+            low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
+        )
+        MuJocoPyEnv.__init__(
+            self,
+            "humanoidstandup.xml",
+            5,
+            observation_space=observation_space,
+            **kwargs,
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def _get_obs(self):
+        data = self.sim.data
+        return np.concatenate(
+            [
+                data.qpos.flat[2:],
+                data.qvel.flat,
+                data.cinert.flat,
+                data.cvel.flat,
+                data.qfrc_actuator.flat,
+                data.cfrc_ext.flat,
+            ]
+        )
+
+    def step(self, a):
+        self.do_simulation(a, self.frame_skip)
+        pos_after = self.sim.data.qpos[2]
+        data = self.sim.data
+        uph_cost = (pos_after - 0) / self.model.opt.timestep
+
+        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
+        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = min(quad_impact_cost, 10)
+        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
+
+        if self.render_mode == "human":
+            self.render()
+        return (
+            self._get_obs(),
+            reward,
+            False,
+            False,
+            dict(
+                reward_linup=uph_cost,
+                reward_quadctrl=-quad_ctrl_cost,
+                reward_impact=-quad_impact_cost,
+            ),
+        )
+
+    def reset_model(self):
+        c = 0.01
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(
+                low=-c,
+                high=c,
+                size=self.model.nv,
+            ),
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.trackbodyid = 1
+        self.viewer.cam.distance = self.model.stat.extent * 1.0
+        self.viewer.cam.lookat[2] = 0.8925
+        self.viewer.cam.elevation = -20
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoidstandup_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/humanoidstandup_v4.py
@ -0,0 +1,319 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 1,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 0.8925)),
+    "elevation": -20.0,
+}
+
+
+class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment is based on the environment introduced by Tassa, Erez and Todorov
+    in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
+    The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a
+    pair of legs and arms. The legs each consist of two links, and so the arms (representing the
+    knees and elbows respectively). The environment starts with the humanoid laying on the ground,
+    and then the goal of the environment is to make the humanoid standup and then keep it standing
+    by applying torques on the various hinges.
+
+    ## Action Space
+    The agent take a 17-element vector for actions.
+
+    The action space is a continuous `(action, ...)` all in `[-1, 1]`, where `action`
+    represents the numerical torques applied at the hinge joints.
+
+    | Num | Action                                                                             | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit         |
+    | --- | ---------------------------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
+    | 0   | Torque applied on the hinge in the y-coordinate of the abdomen                     | -0.4        | 0.4         | abdomen_y                        | hinge | torque (N m) |
+    | 1   | Torque applied on the hinge in the z-coordinate of the abdomen                     | -0.4        | 0.4         | abdomen_z                        | hinge | torque (N m) |
+    | 2   | Torque applied on the hinge in the x-coordinate of the abdomen                     | -0.4        | 0.4         | abdomen_x                        | hinge | torque (N m) |
+    | 3   | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4        | 0.4         | right_hip_x (right_thigh)        | hinge | torque (N m) |
+    | 4   | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4        | 0.4         | right_hip_z (right_thigh)        | hinge | torque (N m) |
+    | 5   | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4        | 0.4         | right_hip_y (right_thigh)        | hinge | torque (N m) |
+    | 6   | Torque applied on the rotor between the right hip/thigh and the right shin         | -0.4        | 0.4         | right_knee                       | hinge | torque (N m) |
+    | 7   | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate)  | -0.4        | 0.4         | left_hip_x (left_thigh)          | hinge | torque (N m) |
+    | 8   | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate)  | -0.4        | 0.4         | left_hip_z (left_thigh)          | hinge | torque (N m) |
+    | 9   | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate)  | -0.4        | 0.4         | left_hip_y (left_thigh)          | hinge | torque (N m) |
+    | 10  | Torque applied on the rotor between the left hip/thigh and the left shin           | -0.4        | 0.4         | left_knee                        | hinge | torque (N m) |
+    | 11  | Torque applied on the rotor between the torso and right upper arm (coordinate -1)  | -0.4        | 0.4         | right_shoulder1                  | hinge | torque (N m) |
+    | 12  | Torque applied on the rotor between the torso and right upper arm (coordinate -2)  | -0.4        | 0.4         | right_shoulder2                  | hinge | torque (N m) |
+    | 13  | Torque applied on the rotor between the right upper arm and right lower arm        | -0.4        | 0.4         | right_elbow                      | hinge | torque (N m) |
+    | 14  | Torque applied on the rotor between the torso and left upper arm (coordinate -1)   | -0.4        | 0.4         | left_shoulder1                   | hinge | torque (N m) |
+    | 15  | Torque applied on the rotor between the torso and left upper arm (coordinate -2)   | -0.4        | 0.4         | left_shoulder2                   | hinge | torque (N m) |
+    | 16  | Torque applied on the rotor between the left upper arm and left lower arm          | -0.4        | 0.4         | left_elbow                       | hinge | torque (N m) |
+
+    ## Observation Space
+    Observations consist of positional values of different body parts of the Humanoid,
+    followed by the velocities of those individual parts (their derivatives) with all the
+    positions ordered before all the velocities.
+
+    By default, observations do not include the x- and y-coordinates of the torso. These may
+    be included by passing `exclude_current_positions_from_observation=False` during construction.
+    In that case, the observation space will be a `Box(-Inf, Inf, (378,), float64)` where the first two observations
+    represent the x- and y-coordinates of the torso.
+    Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
+    will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
+
+    However, by default, the observation is a `Box(-Inf, Inf, (376,), float64)`. The elements correspond to the following:
+
+    | Num | Observation                                                                                                     | Min  | Max | Name (in corresponding XML file) | Joint | Unit                       |
+    | --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
+    | 0   | z-coordinate of the torso (centre)                                                                              | -Inf | Inf | root                             | free  | position (m)               |
+    | 1   | x-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 2   | y-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 3   | z-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 4   | w-orientation of the torso (centre)                                                                             | -Inf | Inf | root                             | free  | angle (rad)                |
+    | 5   | z-angle of the abdomen (in lower_waist)                                                                         | -Inf | Inf | abdomen_z                        | hinge | angle (rad)                |
+    | 6   | y-angle of the abdomen (in lower_waist)                                                                         | -Inf | Inf | abdomen_y                        | hinge | angle (rad)                |
+    | 7   | x-angle of the abdomen (in pelvis)                                                                              | -Inf | Inf | abdomen_x                        | hinge | angle (rad)                |
+    | 8   | x-coordinate of angle between pelvis and right hip (in right_thigh)                                             | -Inf | Inf | right_hip_x                      | hinge | angle (rad)                |
+    | 9   | z-coordinate of angle between pelvis and right hip (in right_thigh)                                             | -Inf | Inf | right_hip_z                      | hinge | angle (rad)                |
+    | 10  | y-coordinate of angle between pelvis and right hip (in right_thigh)                                             | -Inf | Inf | right_hip_y                      | hinge | angle (rad)                |
+    | 11  | angle between right hip and the right shin (in right_knee)                                                      | -Inf | Inf | right_knee                       | hinge | angle (rad)                |
+    | 12  | x-coordinate of angle between pelvis and left hip (in left_thigh)                                               | -Inf | Inf | left_hip_x                       | hinge | angle (rad)                |
+    | 13  | z-coordinate of angle between pelvis and left hip (in left_thigh)                                               | -Inf | Inf | left_hip_z                       | hinge | angle (rad)                |
+    | 14  | y-coordinate of angle between pelvis and left hip (in left_thigh)                                               | -Inf | Inf | left_hip_y                       | hinge | angle (rad)                |
+    | 15  | angle between left hip and the left shin (in left_knee)                                                         | -Inf | Inf | left_knee                        | hinge | angle (rad)                |
+    | 16  | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm)                                | -Inf | Inf | right_shoulder1                  | hinge | angle (rad)                |
+    | 17  | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm)                                | -Inf | Inf | right_shoulder2                  | hinge | angle (rad)                |
+    | 18  | angle between right upper arm and right_lower_arm                                                               | -Inf | Inf | right_elbow                      | hinge | angle (rad)                |
+    | 19  | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm)                                  | -Inf | Inf | left_shoulder1                   | hinge | angle (rad)                |
+    | 20  | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm)                                  | -Inf | Inf | left_shoulder2                   | hinge | angle (rad)                |
+    | 21  | angle between left upper arm and left_lower_arm                                                                 | -Inf | Inf | left_elbow                       | hinge | angle (rad)                |
+    | 22  | x-coordinate velocity of the torso (centre)                                                                     | -Inf | Inf | root                             | free  | velocity (m/s)             |
+    | 23  | y-coordinate velocity of the torso (centre)                                                                     | -Inf | Inf | root                             | free  | velocity (m/s)             |
+    | 24  | z-coordinate velocity of the torso (centre)                                                                     | -Inf | Inf | root                             | free  | velocity (m/s)             |
+    | 25  | x-coordinate angular velocity of the torso (centre)                                                             | -Inf | Inf | root                             | free  | anglular velocity (rad/s)  |
+    | 26  | y-coordinate angular velocity of the torso (centre)                                                             | -Inf | Inf | root                             | free  | anglular velocity (rad/s)  |
+    | 27  | z-coordinate angular velocity of the torso (centre)                                                             | -Inf | Inf | root                             | free  | anglular velocity (rad/s)  |
+    | 28  | z-coordinate of angular velocity of the abdomen (in lower_waist)                                                | -Inf | Inf | abdomen_z                        | hinge | anglular velocity (rad/s)  |
+    | 29  | y-coordinate of angular velocity of the abdomen (in lower_waist)                                                | -Inf | Inf | abdomen_y                        | hinge | anglular velocity (rad/s)  |
+    | 30  | x-coordinate of angular velocity of the abdomen (in pelvis)                                                     | -Inf | Inf | abdomen_x                        | hinge | aanglular velocity (rad/s) |
+    | 31  | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh)                 | -Inf | Inf | right_hip_x                      | hinge | anglular velocity (rad/s)  |
+    | 32  | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh)                 | -Inf | Inf | right_hip_z                      | hinge | anglular velocity (rad/s)  |
+    | 33  | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh)                 | -Inf | Inf | right_hip_y                      | hinge | anglular velocity (rad/s)  |
+    | 34  | angular velocity of the angle between right hip and the right shin (in right_knee)                              | -Inf | Inf | right_knee                       | hinge | anglular velocity (rad/s)  |
+    | 35  | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh)                   | -Inf | Inf | left_hip_x                       | hinge | anglular velocity (rad/s)  |
+    | 36  | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh)                   | -Inf | Inf | left_hip_z                       | hinge | anglular velocity (rad/s)  |
+    | 37  | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh)                   | -Inf | Inf | left_hip_y                       | hinge | anglular velocity (rad/s)  |
+    | 38  | angular velocity of the angle between left hip and the left shin (in left_knee)                                 | -Inf | Inf | left_knee                        | hinge | anglular velocity (rad/s)  |
+    | 39  | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1                  | hinge | anglular velocity (rad/s)  |
+    | 40  | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2                  | hinge | anglular velocity (rad/s)  |
+    | 41  | angular velocity of the angle between right upper arm and right_lower_arm                                       | -Inf | Inf | right_elbow                      | hinge | anglular velocity (rad/s)  |
+    | 42  | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm)   | -Inf | Inf | left_shoulder1                   | hinge | anglular velocity (rad/s)  |
+    | 43  | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm)   | -Inf | Inf | left_shoulder2                   | hinge | anglular velocity (rad/s)  |
+    | 44  | angular velocity of the angle between left upper arm and left_lower_arm                                         | -Inf | Inf | left_elbow                       | hinge | anglular velocity (rad/s)  |
+    | excluded | x-coordinate of the torso (centre)                                                                         | -Inf | Inf | root                             | free  | position (m)               |
+    | excluded | y-coordinate of the torso (centre)                                                                         | -Inf | Inf | root                             | free  | position (m)               |
+
+    Additionally, after all the positional and velocity based values in the table,
+    the observation contains (in order):
+    - *cinert:* Mass and inertia of a single rigid body relative to the center of mass
+    (this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
+    and hence adds to another 140 elements in the state space.
+    - *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
+    adds another 84 elements in the state space
+    - *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
+    `(23,)`  *(nv * 1)* and hence adds another 23 elements to the state space.
+    - *cfrc_ext:* This is the center of mass based external force on the body.  It has shape
+    14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
+    where *nbody* stands for the number of bodies in the robot and *nv* stands for the
+    number of degrees of freedom (*= dim(qvel)*)
+
+    The body parts are:
+
+    | id (for `v2`,`v3`,`v4`) | body part |
+    | --- |  ------------  |
+    | 0   | worldBody (note: all values are constant 0) |
+    | 1   | torso |
+    | 2   | lwaist |
+    | 3   | pelvis |
+    | 4   | right_thigh |
+    | 5   | right_sin |
+    | 6   | right_foot |
+    | 7   | left_thigh |
+    | 8   | left_sin |
+    | 9   | left_foot |
+    | 10  | right_upper_arm |
+    | 11  | right_lower_arm |
+    | 12  | left_upper_arm |
+    | 13  | left_lower_arm |
+
+    The joints are:
+
+    | id (for `v2`,`v3`,`v4`) | joint |
+    | --- |  ------------  |
+    | 0   | root |
+    | 1   | root |
+    | 2   | root |
+    | 3   | root |
+    | 4   | root |
+    | 5   | root |
+    | 6   | abdomen_z |
+    | 7   | abdomen_y |
+    | 8   | abdomen_x |
+    | 9   | right_hip_x |
+    | 10  | right_hip_z |
+    | 11  | right_hip_y |
+    | 12  | right_knee |
+    | 13  | left_hip_x |
+    | 14  | left_hiz_z |
+    | 15  | left_hip_y |
+    | 16  | left_knee |
+    | 17  | right_shoulder1 |
+    | 18  | right_shoulder2 |
+    | 19  | right_elbow|
+    | 20  | left_shoulder1 |
+    | 21  | left_shoulder2 |
+    | 22  | left_elfbow |
+
+    The (x,y,z) coordinates are translational DOFs while the orientations are rotational
+    DOFs expressed as quaternions. One can read more about free joints on the
+    [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
+
+    **Note:** HumanoidStandup-v4 environment no longer has the following contact forces issue.
+    If using previous HumanoidStandup versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
+    in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
+    when using the Humanoid environment if you would like to report results with contact forces
+    (if contact forces are not used in your experiments, you can use version > 2.0).
+
+    ## Rewards
+    The reward consists of three parts:
+    - *uph_cost*: A reward for moving upward (in an attempt to stand up). This is not a relative
+    reward which measures how much upward it has moved from the last timestep, but it is an
+    absolute reward which measures how much upward the Humanoid has moved overall. It is
+    measured as *(z coordinate after action - 0)/(atomic timestep)*, where *z coordinate after
+    action* is index 0 in the state/index 2 in the table, and *atomic timestep* is the time for
+    one frame of movement even though the simulation has a framerate of 5 (done in order to inflate
+    rewards a little for faster learning).
+    - *quad_ctrl_cost*: A negative reward for penalising the humanoid if it has too large of
+    a control force. If there are *nu* actuators/controls, then the control has shape  `nu x 1`.
+    It is measured as *0.1 **x** sum(control<sup>2</sup>)*.
+    - *quad_impact_cost*: A negative reward for penalising the humanoid if the external
+    contact force is too large. It is calculated as *min(0.5 * 0.000001 * sum(external
+    contact force<sup>2</sup>), 10)*.
+
+    The total reward returned is ***reward*** *=* *uph_cost + 1 - quad_ctrl_cost - quad_impact_cost*
+
+    ## Starting State
+    All observations start in state
+    (0.0, 0.0,  0.105, 1.0, 0.0  ... 0.0) with a uniform noise in the range of
+    [-0.01, 0.01] added to the positional and velocity values (values in the table)
+    for stochasticity. Note that the initial z coordinate is intentionally selected
+    to be low, thereby indicating a laying down humanoid. The initial orientation is
+    designed to make it face forward as well.
+
+    ## Episode End
+    The episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches a 1000 timesteps
+    2. Termination: Any of the state space values is no longer finite
+
+    ## Arguments
+
+    No additional arguments are currently supported.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('HumanoidStandup-v4')
+    ```
+
+    There is no v3 for HumanoidStandup, unlike the robot environments where a v3 and
+    beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('HumanoidStandup-v2')
+    ```
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 67,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(
+            low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
+        )
+        MujocoEnv.__init__(
+            self,
+            "humanoidstandup.xml",
+            5,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def _get_obs(self):
+        data = self.data
+        return np.concatenate(
+            [
+                data.qpos.flat[2:],
+                data.qvel.flat,
+                data.cinert.flat,
+                data.cvel.flat,
+                data.qfrc_actuator.flat,
+                data.cfrc_ext.flat,
+            ]
+        )
+
+    def step(self, a):
+        self.do_simulation(a, self.frame_skip)
+        pos_after = self.data.qpos[2]
+        data = self.data
+        uph_cost = (pos_after - 0) / self.model.opt.timestep
+
+        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
+        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = min(quad_impact_cost, 10)
+        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
+
+        if self.render_mode == "human":
+            self.render()
+        return (
+            self._get_obs(),
+            reward,
+            False,
+            False,
+            dict(
+                reward_linup=uph_cost,
+                reward_quadctrl=-quad_ctrl_cost,
+                reward_impact=-quad_impact_cost,
+            ),
+        )
+
+    def reset_model(self):
+        c = 0.01
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(
+                low=-c,
+                high=c,
+                size=self.model.nv,
+            ),
+        )
+        return self._get_obs()
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_double_pendulum.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_double_pendulum.py
@ -0,0 +1,69 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self,
+            "inverted_double_pendulum.xml",
+            5,
+            observation_space=observation_space,
+            **kwargs,
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def step(self, action):
+        self.do_simulation(action, self.frame_skip)
+
+        ob = self._get_obs()
+        x, _, y = self.sim.data.site_xpos[0]
+        dist_penalty = 0.01 * x**2 + (y - 2) ** 2
+        v1, v2 = self.sim.data.qvel[1:3]
+        vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
+        alive_bonus = 10
+        r = alive_bonus - dist_penalty - vel_penalty
+        terminated = bool(y <= 1)
+
+        if self.render_mode == "human":
+            self.render()
+        return ob, r, terminated, False, {}
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.sim.data.qpos[:1],  # cart x pos
+                np.sin(self.sim.data.qpos[1:]),  # link angles
+                np.cos(self.sim.data.qpos[1:]),
+                np.clip(self.sim.data.qvel, -10, 10),
+                np.clip(self.sim.data.qfrc_constraint, -10, 10),
+            ]
+        ).ravel()
+
+    def reset_model(self):
+        self.set_state(
+            self.init_qpos
+            + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
+            self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        v = self.viewer
+        v.cam.trackbodyid = 0
+        v.cam.distance = self.model.stat.extent * 0.5
+        v.cam.lookat[2] = 0.12250000000000005  # v.model.stat.center[2]
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py
@ -0,0 +1,179 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 0,
+    "distance": 4.1225,
+    "lookat": np.array((0.0, 0.0, 0.12250000000000005)),
+}
+
+
+class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment originates from control theory and builds on the cartpole
+    environment based on the work done by Barto, Sutton, and Anderson in
+    ["Neuronlike adaptive elements that can solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
+    powered by the Mujoco physics simulator - allowing for more complex experiments
+    (such as varying the effects of gravity or constraints). This environment involves a cart that can
+    moved linearly, with a pole fixed on it and a second pole fixed on the other end of the first one
+    (leaving the second pole as the only one with one free end). The cart can be pushed left or right,
+    and the goal is to balance the second pole on top of the first pole, which is in turn on top of the
+    cart, by applying continuous forces on the cart.
+
+    ## Action Space
+    The agent take a 1-element vector for actions.
+    The action space is a continuous `(action)` in `[-1, 1]`, where `action` represents the
+    numerical force applied to the cart (with magnitude representing the amount of force and
+    sign representing the direction)
+
+    | Num | Action                    | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit      |
+    |-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
+    | 0   | Force applied on the cart | -1          | 1           | slider                           | slide | Force (N) |
+
+    ## Observation Space
+
+    The state space consists of positional values of different body parts of the pendulum system,
+    followed by the velocities of those individual parts (their derivatives) with all the
+    positions ordered before all the velocities.
+
+    The observation is a `ndarray` with shape `(11,)` where the elements correspond to the following:
+
+    | Num | Observation                                                       | Min  | Max | Name (in corresponding XML file) | Joint | Unit                     |
+    | --- | ----------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
+    | 0   | position of the cart along the linear surface                     | -Inf | Inf | slider                           | slide | position (m)             |
+    | 1   | sine of the angle between the cart and the first pole             | -Inf | Inf | sin(hinge)                       | hinge | unitless                 |
+    | 2   | sine of the angle between the two poles                           | -Inf | Inf | sin(hinge2)                      | hinge | unitless                 |
+    | 3   | cosine of the angle between the cart and the first pole           | -Inf | Inf | cos(hinge)                       | hinge | unitless                 |
+    | 4   | cosine of the angle between the two poles                         | -Inf | Inf | cos(hinge2)                      | hinge | unitless                 |
+    | 5   | velocity of the cart                                              | -Inf | Inf | slider                           | slide | velocity (m/s)           |
+    | 6   | angular velocity of the angle between the cart and the first pole | -Inf | Inf | hinge                            | hinge | angular velocity (rad/s) |
+    | 7   | angular velocity of the angle between the two poles               | -Inf | Inf | hinge2                           | hinge | angular velocity (rad/s) |
+    | 8   | constraint force - 1                                              | -Inf | Inf |                                  |       | Force (N)                |
+    | 9   | constraint force - 2                                              | -Inf | Inf |                                  |       | Force (N)                |
+    | 10  | constraint force - 3                                              | -Inf | Inf |                                  |       | Force (N)                |
+
+
+    There is physical contact between the robots and their environment - and Mujoco
+    attempts at getting realistic physics simulations for the possible physical contact
+    dynamics by aiming for physical accuracy and computational efficiency.
+
+    There is one constraint force for contacts for each degree of freedom (3).
+    The approach and handling of constraints by Mujoco is unique to the simulator
+    and is based on their research. Once can find more information in their
+    [*documentation*](https://mujoco.readthedocs.io/en/latest/computation.html)
+    or in their paper
+    ["Analytically-invertible dynamics with contacts and constraints: Theory and implementation in MuJoCo"](https://homes.cs.washington.edu/~todorov/papers/TodorovICRA14.pdf).
+
+
+    ## Rewards
+
+    The reward consists of two parts:
+    - *alive_bonus*: The goal is to make the second inverted pendulum stand upright
+    (within a certain angle limit) as long as possible - as such a reward of +10 is awarded
+     for each timestep that the second pole is upright.
+    - *distance_penalty*: This reward is a measure of how far the *tip* of the second pendulum
+    (the only free end) moves, and it is calculated as
+    *0.01 * x<sup>2</sup> + (y - 2)<sup>2</sup>*, where *x* is the x-coordinate of the tip
+    and *y* is the y-coordinate of the tip of the second pole.
+    - *velocity_penalty*: A negative reward for penalising the agent if it moves too
+    fast *0.001 *  v<sub>1</sub><sup>2</sup> + 0.005 * v<sub>2</sub> <sup>2</sup>*
+
+    The total reward returned is ***reward*** *=* *alive_bonus - distance_penalty - velocity_penalty*
+
+    ## Starting State
+    All observations start in state
+    (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
+    of [-0.1, 0.1] added to the positional values (cart position and pole angles) and standard
+    normal force with a standard deviation of 0.1 added to the velocity values for stochasticity.
+
+    ## Episode End
+    The episode ends when any of the following happens:
+
+    1.Truncation:  The episode duration reaches 1000 timesteps.
+    2.Termination: Any of the state space values is no longer finite.
+    3.Termination: The y_coordinate of the tip of the second pole *is less than or equal* to 1. The maximum standing height of the system is 1.196 m when all the parts are perpendicularly vertical on top of each other).
+
+    ## Arguments
+
+    No additional arguments are currently supported.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('InvertedDoublePendulum-v4')
+    ```
+    There is no v3 for InvertedPendulum, unlike the robot environments where a v3 and
+    beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('InvertedDoublePendulum-v2')
+    ```
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
+        MujocoEnv.__init__(
+            self,
+            "inverted_double_pendulum.xml",
+            5,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def step(self, action):
+        self.do_simulation(action, self.frame_skip)
+        ob = self._get_obs()
+        x, _, y = self.data.site_xpos[0]
+        dist_penalty = 0.01 * x**2 + (y - 2) ** 2
+        v1, v2 = self.data.qvel[1:3]
+        vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
+        alive_bonus = 10
+        r = alive_bonus - dist_penalty - vel_penalty
+        terminated = bool(y <= 1)
+        if self.render_mode == "human":
+            self.render()
+        return ob, r, terminated, False, {}
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.data.qpos[:1],  # cart x pos
+                np.sin(self.data.qpos[1:]),  # link angles
+                np.cos(self.data.qpos[1:]),
+                np.clip(self.data.qvel, -10, 10),
+                np.clip(self.data.qfrc_constraint, -10, 10),
+            ]
+        ).ravel()
+
+    def reset_model(self):
+        self.set_state(
+            self.init_qpos
+            + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
+            self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
+        )
+        return self._get_obs()
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_pendulum.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_pendulum.py
@ -0,0 +1,56 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 25,
+    }
+
+    def __init__(self, **kwargs):
+        utils.EzPickle.__init__(self, **kwargs)
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self,
+            "inverted_pendulum.xml",
+            2,
+            observation_space=observation_space,
+            **kwargs,
+        )
+
+    def step(self, a):
+        reward = 1.0
+        self.do_simulation(a, self.frame_skip)
+
+        ob = self._get_obs()
+        terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
+
+        if self.render_mode == "human":
+            self.render()
+        return ob, reward, terminated, False, {}
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.model.nq, low=-0.01, high=0.01
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            size=self.model.nv, low=-0.01, high=0.01
+        )
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.trackbodyid = 0
+        self.viewer.cam.distance = self.model.stat.extent
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_pendulum_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/inverted_pendulum_v4.py
@ -0,0 +1,137 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 0,
+    "distance": 2.04,
+}
+
+
+class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment is the cartpole environment based on the work done by
+    Barto, Sutton, and Anderson in ["Neuronlike adaptive elements that can
+    solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
+    just like in the classic environments but now powered by the Mujoco physics simulator -
+    allowing for more complex experiments (such as varying the effects of gravity).
+    This environment involves a cart that can moved linearly, with a pole fixed on it
+    at one end and having another end free. The cart can be pushed left or right, and the
+    goal is to balance the pole on the top of the cart by applying forces on the cart.
+
+    ## Action Space
+    The agent take a 1-element vector for actions.
+
+    The action space is a continuous `(action)` in `[-3, 3]`, where `action` represents
+    the numerical force applied to the cart (with magnitude representing the amount of
+    force and sign representing the direction)
+
+    | Num | Action                    | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit      |
+    |-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
+    | 0   | Force applied on the cart | -3          | 3           | slider                           | slide | Force (N) |
+
+    ## Observation Space
+
+    The state space consists of positional values of different body parts of
+    the pendulum system, followed by the velocities of those individual parts (their derivatives)
+    with all the positions ordered before all the velocities.
+
+    The observation is a `ndarray` with shape `(4,)` where the elements correspond to the following:
+
+    | Num | Observation                                   | Min  | Max | Name (in corresponding XML file) | Joint | Unit                      |
+    | --- | --------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------- |
+    | 0   | position of the cart along the linear surface | -Inf | Inf | slider                           | slide | position (m)              |
+    | 1   | vertical angle of the pole on the cart        | -Inf | Inf | hinge                            | hinge | angle (rad)               |
+    | 2   | linear velocity of the cart                   | -Inf | Inf | slider                           | slide | velocity (m/s)            |
+    | 3   | angular velocity of the pole on the cart      | -Inf | Inf | hinge                            | hinge | anglular velocity (rad/s) |
+
+
+    ## Rewards
+
+    The goal is to make the inverted pendulum stand upright (within a certain angle limit)
+    as long as possible - as such a reward of +1 is awarded for each timestep that
+    the pole is upright.
+
+    ## Starting State
+    All observations start in state
+    (0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
+    of [-0.01, 0.01] added to the values for stochasticity.
+
+    ## Episode End
+    The episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches 1000 timesteps.
+    2. Termination: Any of the state space values is no longer finite.
+    3. Termination: The absolute value of the vertical angle between the pole and the cart is greater than 0.2 radian.
+
+    ## Arguments
+
+    No additional arguments are currently supported.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('InvertedPendulum-v4')
+    ```
+    There is no v3 for InvertedPendulum, unlike the robot environments where a
+    v3 and beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+    ```python
+    import gymnasium as gym
+    env = gym.make('InvertedPendulum-v2')
+    ```
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 25,
+    }
+
+    def __init__(self, **kwargs):
+        utils.EzPickle.__init__(self, **kwargs)
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
+        MujocoEnv.__init__(
+            self,
+            "inverted_pendulum.xml",
+            2,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    def step(self, a):
+        reward = 1.0
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
+        if self.render_mode == "human":
+            self.render()
+        return ob, reward, terminated, False, {}
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.model.nq, low=-0.01, high=0.01
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            size=self.model.nv, low=-0.01, high=0.01
+        )
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([self.data.qpos, self.data.qvel]).ravel()
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/mujoco_env.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/mujoco_env.py
@ -0,0 +1,418 @@
+from os import path
+from typing import Any, Dict, Optional, Tuple, Union
+
+import numpy as np
+from numpy.typing import NDArray
+
+import gymnasium as gym
+from gymnasium import error, logger, spaces
+from gymnasium.spaces import Space
+
+
+try:
+    import mujoco_py
+except ImportError as e:
+    MUJOCO_PY_IMPORT_ERROR = e
+else:
+    MUJOCO_PY_IMPORT_ERROR = None
+
+try:
+    import mujoco
+except ImportError as e:
+    MUJOCO_IMPORT_ERROR = e
+else:
+    MUJOCO_IMPORT_ERROR = None
+
+
+DEFAULT_SIZE = 480
+
+
+class BaseMujocoEnv(gym.Env[np.float64, np.float32]):
+    """Superclass for all MuJoCo environments."""
+
+    def __init__(
+        self,
+        model_path,
+        frame_skip,
+        observation_space: Space,
+        render_mode: Optional[str] = None,
+        width: int = DEFAULT_SIZE,
+        height: int = DEFAULT_SIZE,
+        camera_id: Optional[int] = None,
+        camera_name: Optional[str] = None,
+    ):
+        """Base abstract class for mujoco based environments.
+
+        Args:
+            model_path: Path to the MuJoCo Model.
+            frame_skip: Number of MuJoCo simulation steps per gym `step()`.
+            observation_space: The observation space of the environment.
+            render_mode: The `render_mode` used.
+            width: The width of the render window.
+            height: The height of the render window.
+            camera_id: The camera ID used.
+            camera_name: The name of the camera used (can not be used in conjunction with `camera_id`).
+
+        Raises:
+            OSError: when the `model_path` does not exist.
+            error.DependencyNotInstalled: When `mujoco` is not installed.
+        """
+        if model_path.startswith(".") or model_path.startswith("/"):
+            self.fullpath = model_path
+        elif model_path.startswith("~"):
+            self.fullpath = path.expanduser(model_path)
+        else:
+            self.fullpath = path.join(path.dirname(__file__), "assets", model_path)
+        if not path.exists(self.fullpath):
+            raise OSError(f"File {self.fullpath} does not exist")
+
+        self.width = width
+        self.height = height
+        # may use width and height
+        self.model, self.data = self._initialize_simulation()
+
+        self.init_qpos = self.data.qpos.ravel().copy()
+        self.init_qvel = self.data.qvel.ravel().copy()
+
+        self.frame_skip = frame_skip
+
+        assert self.metadata["render_modes"] == [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ], self.metadata["render_modes"]
+        if "render_fps" in self.metadata:
+            assert (
+                int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
+            ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
+
+        self.observation_space = observation_space
+        self._set_action_space()
+
+        self.render_mode = render_mode
+        self.camera_name = camera_name
+        self.camera_id = camera_id
+
+    def _set_action_space(self):
+        bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
+        low, high = bounds.T
+        self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
+        return self.action_space
+
+    # methods to override:
+    # ----------------------------
+
+    def reset_model(self) -> NDArray[np.float64]:
+        """
+        Reset the robot degrees of freedom (qpos and qvel).
+        Implement this in each subclass.
+        """
+        raise NotImplementedError
+
+    def _initialize_simulation(self) -> Tuple[Any, Any]:
+        """
+        Initialize MuJoCo simulation data structures mjModel and mjData.
+        """
+        raise NotImplementedError
+
+    def _reset_simulation(self) -> None:
+        """
+        Reset MuJoCo simulation data structures, mjModel and mjData.
+        """
+        raise NotImplementedError
+
+    def _step_mujoco_simulation(self, ctrl, n_frames) -> None:
+        """
+        Step over the MuJoCo simulation.
+        """
+        raise NotImplementedError
+
+    def render(self) -> Union[NDArray[np.float64], None]:
+        """
+        Render a frame from the MuJoCo simulation as specified by the render_mode.
+        """
+        raise NotImplementedError
+
+    # -----------------------------
+    def _get_reset_info(self) -> Dict[str, float]:
+        """Function that generates the `info` that is returned during a `reset()`."""
+        return {}
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        super().reset(seed=seed)
+
+        self._reset_simulation()
+
+        ob = self.reset_model()
+        info = self._get_reset_info()
+
+        if self.render_mode == "human":
+            self.render()
+        return ob, info
+
+    def set_state(self, qpos, qvel) -> None:
+        """
+        Set the joints position qpos and velocity qvel of the model. Override this method depending on the MuJoCo bindings used.
+        """
+        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
+
+    @property
+    def dt(self) -> float:
+        return self.model.opt.timestep * self.frame_skip
+
+    def do_simulation(self, ctrl, n_frames) -> None:
+        """
+        Step the simulation n number of frames and applying a control action.
+        """
+        # Check control input is contained in the action space
+        if np.array(ctrl).shape != (self.model.nu,):
+            raise ValueError(
+                f"Action dimension mismatch. Expected {(self.model.nu,)}, found {np.array(ctrl).shape}"
+            )
+        self._step_mujoco_simulation(ctrl, n_frames)
+
+    def close(self):
+        """Close all processes like rendering contexts"""
+        raise NotImplementedError
+
+    def get_body_com(self, body_name) -> NDArray[np.float64]:
+        """Return the cartesian position of a body frame"""
+        raise NotImplementedError
+
+    def state_vector(self) -> NDArray[np.float64]:
+        """Return the position and velocity joint states of the model"""
+        return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])
+
+
+class MuJocoPyEnv(BaseMujocoEnv):
+    def __init__(
+        self,
+        model_path: str,
+        frame_skip: int,
+        observation_space: Space,
+        render_mode: Optional[str] = None,
+        width: int = DEFAULT_SIZE,
+        height: int = DEFAULT_SIZE,
+        camera_id: Optional[int] = None,
+        camera_name: Optional[str] = None,
+    ):
+        if MUJOCO_PY_IMPORT_ERROR is not None:
+            raise error.DependencyNotInstalled(
+                f"{MUJOCO_PY_IMPORT_ERROR}. "
+                "(HINT: you need to install mujoco-py, and also perform the setup instructions "
+                "here: https://github.com/openai/mujoco-py.)"
+            )
+
+        logger.deprecation(
+            "This version of the mujoco environments depends "
+            "on the mujoco-py bindings, which are no longer maintained "
+            "and may stop working. Please upgrade to the v4 versions of "
+            "the environments (which depend on the mujoco python bindings instead), unless "
+            "you are trying to precisely replicate previous works)."
+        )
+
+        self.viewer = None
+        self._viewers = {}
+
+        super().__init__(
+            model_path,
+            frame_skip,
+            observation_space,
+            render_mode,
+            width,
+            height,
+            camera_id,
+            camera_name,
+        )
+
+    def _initialize_simulation(self):
+        model = mujoco_py.load_model_from_path(self.fullpath)
+        self.sim = mujoco_py.MjSim(model)
+        data = self.sim.data
+        return model, data
+
+    def _reset_simulation(self):
+        self.sim.reset()
+
+    def set_state(self, qpos, qvel):
+        super().set_state(qpos, qvel)
+        state = self.sim.get_state()
+        state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state)
+        self.sim.set_state(state)
+        self.sim.forward()
+
+    def get_body_com(self, body_name):
+        return self.data.get_body_xpos(body_name)
+
+    def _step_mujoco_simulation(self, ctrl, n_frames):
+        self.sim.data.ctrl[:] = ctrl
+
+        for _ in range(n_frames):
+            self.sim.step()
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        width, height = self.width, self.height
+        camera_name, camera_id = self.camera_name, self.camera_id
+        if self.render_mode in {"rgb_array", "depth_array"}:
+            if camera_id is not None and camera_name is not None:
+                raise ValueError(
+                    "Both `camera_id` and `camera_name` cannot be"
+                    " specified at the same time."
+                )
+
+            no_camera_specified = camera_name is None and camera_id is None
+            if no_camera_specified:
+                camera_name = "track"
+
+            if camera_id is None and camera_name in self.model._camera_name2id:
+                if camera_name in self.model._camera_name2id:
+                    camera_id = self.model.camera_name2id(camera_name)
+
+                self._get_viewer(self.render_mode).render(
+                    width, height, camera_id=camera_id
+                )
+
+        if self.render_mode == "rgb_array":
+            data = self._get_viewer(self.render_mode).read_pixels(
+                width, height, depth=False
+            )
+            # original image is upside-down, so flip it
+            return data[::-1, :, :]
+        elif self.render_mode == "depth_array":
+            self._get_viewer(self.render_mode).render(width, height)
+            # Extract depth part of the read_pixels() tuple
+            data = self._get_viewer(self.render_mode).read_pixels(
+                width, height, depth=True
+            )[1]
+            # original image is upside-down, so flip it
+            return data[::-1, :]
+        elif self.render_mode == "human":
+            self._get_viewer(self.render_mode).render()
+
+    def _get_viewer(
+        self, mode
+    ) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]:
+        self.viewer = self._viewers.get(mode)
+        if self.viewer is None:
+            if mode == "human":
+                self.viewer = mujoco_py.MjViewer(self.sim)
+
+            elif mode in {"rgb_array", "depth_array"}:
+                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
+            else:
+                raise AttributeError(
+                    f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}"
+                )
+
+            self.viewer_setup()
+            self._viewers[mode] = self.viewer
+
+        return self.viewer
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer = None
+            self._viewers = {}
+
+    def viewer_setup(self):
+        """
+        This method is called when the viewer is initialized.
+        Optionally implement this method, if you need to tinker with camera position and so forth.
+        """
+        raise NotImplementedError
+
+
+class MujocoEnv(BaseMujocoEnv):
+    """Superclass for MuJoCo environments."""
+
+    def __init__(
+        self,
+        model_path,
+        frame_skip,
+        observation_space: Space,
+        render_mode: Optional[str] = None,
+        width: int = DEFAULT_SIZE,
+        height: int = DEFAULT_SIZE,
+        camera_id: Optional[int] = None,
+        camera_name: Optional[str] = None,
+        default_camera_config: Optional[dict] = None,
+    ):
+        if MUJOCO_IMPORT_ERROR is not None:
+            raise error.DependencyNotInstalled(
+                f"{MUJOCO_IMPORT_ERROR}. "
+                "(HINT: you need to install mujoco, run `pip install gymnasium[mujoco]`.)"
+            )
+
+        super().__init__(
+            model_path,
+            frame_skip,
+            observation_space,
+            render_mode,
+            width,
+            height,
+            camera_id,
+            camera_name,
+        )
+
+        from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
+
+        self.mujoco_renderer = MujocoRenderer(
+            self.model, self.data, default_camera_config
+        )
+
+    def _initialize_simulation(
+        self,
+    ):
+        model = mujoco.MjModel.from_xml_path(self.fullpath)
+        # MjrContext will copy model.vis.global_.off* to con.off*
+        model.vis.global_.offwidth = self.width
+        model.vis.global_.offheight = self.height
+        data = mujoco.MjData(model)
+        return model, data
+
+    def _reset_simulation(self):
+        mujoco.mj_resetData(self.model, self.data)
+
+    def set_state(self, qpos, qvel):
+        super().set_state(qpos, qvel)
+        self.data.qpos[:] = np.copy(qpos)
+        self.data.qvel[:] = np.copy(qvel)
+        if self.model.na == 0:
+            self.data.act[:] = None
+        mujoco.mj_forward(self.model, self.data)
+
+    def _step_mujoco_simulation(self, ctrl, n_frames):
+        self.data.ctrl[:] = ctrl
+
+        mujoco.mj_step(self.model, self.data, nstep=n_frames)
+
+        # As of MuJoCo 2.0, force-related quantities like cacc are not computed
+        # unless there's a force sensor in the model.
+        # See https://github.com/openai/gym/issues/1541
+        mujoco.mj_rnePostConstraint(self.model, self.data)
+
+    def render(self):
+        return self.mujoco_renderer.render(
+            self.render_mode, self.camera_id, self.camera_name
+        )
+
+    def close(self):
+        if self.mujoco_renderer is not None:
+            self.mujoco_renderer.close()
+
+    def get_body_com(self, body_name):
+        return self.data.body(body_name).xpos
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/mujoco_rendering.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/mujoco_rendering.py
@ -0,0 +1,714 @@
+import collections
+import os
+import time
+from typing import Optional
+
+import glfw
+import imageio
+import mujoco
+import numpy as np
+
+
+def _import_egl(width, height):
+    from mujoco.egl import GLContext
+
+    return GLContext(width, height)
+
+
+def _import_glfw(width, height):
+    from mujoco.glfw import GLContext
+
+    return GLContext(width, height)
+
+
+def _import_osmesa(width, height):
+    from mujoco.osmesa import GLContext
+
+    return GLContext(width, height)
+
+
+_ALL_RENDERERS = collections.OrderedDict(
+    [
+        ("glfw", _import_glfw),
+        ("egl", _import_egl),
+        ("osmesa", _import_osmesa),
+    ]
+)
+
+
+class BaseRender:
+    def __init__(
+        self, model: "mujoco.MjModel", data: "mujoco.MjData", width: int, height: int
+    ):
+        """Render context superclass for offscreen and window rendering."""
+        self.model = model
+        self.data = data
+
+        self._markers = []
+        self._overlays = {}
+
+        self.viewport = mujoco.MjrRect(0, 0, width, height)
+
+        # This goes to specific visualizer
+        self.scn = mujoco.MjvScene(self.model, 1000)
+        self.cam = mujoco.MjvCamera()
+        self.vopt = mujoco.MjvOption()
+        self.pert = mujoco.MjvPerturb()
+
+        self.make_context_current()
+
+        # Keep in Mujoco Context
+        self.con = mujoco.MjrContext(self.model, mujoco.mjtFontScale.mjFONTSCALE_150)
+
+        self._set_mujoco_buffer()
+
+    def _set_mujoco_buffer(self):
+        raise NotImplementedError
+
+    def make_context_current(self):
+        raise NotImplementedError
+
+    def add_overlay(self, gridpos: int, text1: str, text2: str):
+        """Overlays text on the scene."""
+        if gridpos not in self._overlays:
+            self._overlays[gridpos] = ["", ""]
+        self._overlays[gridpos][0] += text1 + "\n"
+        self._overlays[gridpos][1] += text2 + "\n"
+
+    def add_marker(self, **marker_params):
+        self._markers.append(marker_params)
+
+    def _add_marker_to_scene(self, marker: dict):
+        if self.scn.ngeom >= self.scn.maxgeom:
+            raise RuntimeError("Ran out of geoms. maxgeom: %d" % self.scn.maxgeom)
+
+        g = self.scn.geoms[self.scn.ngeom]
+        # default values.
+        g.dataid = -1
+        g.objtype = mujoco.mjtObj.mjOBJ_UNKNOWN
+        g.objid = -1
+        g.category = mujoco.mjtCatBit.mjCAT_DECOR
+        g.texid = -1
+        g.texuniform = 0
+        g.texrepeat[0] = 1
+        g.texrepeat[1] = 1
+        g.emission = 0
+        g.specular = 0.5
+        g.shininess = 0.5
+        g.reflectance = 0
+        g.type = mujoco.mjtGeom.mjGEOM_BOX
+        g.size[:] = np.ones(3) * 0.1
+        g.mat[:] = np.eye(3)
+        g.rgba[:] = np.ones(4)
+
+        for key, value in marker.items():
+            if isinstance(value, (int, float, mujoco._enums.mjtGeom)):
+                setattr(g, key, value)
+            elif isinstance(value, (tuple, list, np.ndarray)):
+                attr = getattr(g, key)
+                attr[:] = np.asarray(value).reshape(attr.shape)
+            elif isinstance(value, str):
+                assert key == "label", "Only label is a string in mjtGeom."
+                if value is None:
+                    g.label[0] = 0
+                else:
+                    g.label = value
+            elif hasattr(g, key):
+                raise ValueError(
+                    "mjtGeom has attr {} but type {} is invalid".format(
+                        key, type(value)
+                    )
+                )
+            else:
+                raise ValueError("mjtGeom doesn't have field %s" % key)
+
+        self.scn.ngeom += 1
+
+    def close(self):
+        """Override close in your rendering subclass to perform any necessary cleanup
+        after env.close() is called.
+        """
+        raise NotImplementedError
+
+
+class OffScreenViewer(BaseRender):
+    """Offscreen rendering class with opengl context."""
+
+    def __init__(self, model: "mujoco.MjMujoco", data: "mujoco.MjData"):
+        width = model.vis.global_.offwidth
+        height = model.vis.global_.offheight
+
+        # We must make GLContext before MjrContext
+        self._get_opengl_backend(width, height)
+
+        super().__init__(model, data, width, height)
+
+        self._init_camera()
+
+    def _init_camera(self):
+        self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
+        self.cam.fixedcamid = -1
+        for i in range(3):
+            self.cam.lookat[i] = np.median(self.data.geom_xpos[:, i])
+        self.cam.distance = self.model.stat.extent
+
+    def _get_opengl_backend(self, width: int, height: int):
+        self.backend = os.environ.get("MUJOCO_GL")
+        if self.backend is not None:
+            try:
+                self.opengl_context = _ALL_RENDERERS[self.backend](width, height)
+            except KeyError as e:
+                raise RuntimeError(
+                    "Environment variable {} must be one of {!r}: got {!r}.".format(
+                        "MUJOCO_GL", _ALL_RENDERERS.keys(), self.backend
+                    )
+                ) from e
+
+        else:
+            for name, _ in _ALL_RENDERERS.items():
+                try:
+                    self.opengl_context = _ALL_RENDERERS[name](width, height)
+                    self.backend = name
+                    break
+                except:  # noqa:E722
+                    pass
+            if self.backend is None:
+                raise RuntimeError(
+                    "No OpenGL backend could be imported. Attempting to create a "
+                    "rendering context will result in a RuntimeError."
+                )
+
+    def _set_mujoco_buffer(self):
+        mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_OFFSCREEN, self.con)
+
+    def make_context_current(self):
+        self.opengl_context.make_current()
+
+    def free(self):
+        self.opengl_context.free()
+
+    def __del__(self):
+        self.free()
+
+    def render(
+        self,
+        render_mode: str,
+        camera_id: Optional[int] = None,
+        segmentation: bool = False,
+    ):
+        if camera_id is not None:
+            if camera_id == -1:
+                self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
+            else:
+                self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED
+            self.cam.fixedcamid = camera_id
+
+        mujoco.mjv_updateScene(
+            self.model,
+            self.data,
+            self.vopt,
+            self.pert,
+            self.cam,
+            mujoco.mjtCatBit.mjCAT_ALL,
+            self.scn,
+        )
+
+        if segmentation:
+            self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 1
+            self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 1
+
+        for marker_params in self._markers:
+            self._add_marker_to_scene(marker_params)
+
+        mujoco.mjr_render(self.viewport, self.scn, self.con)
+
+        for gridpos, (text1, text2) in self._overlays.items():
+            mujoco.mjr_overlay(
+                mujoco.mjtFontScale.mjFONTSCALE_150,
+                gridpos,
+                self.viewport,
+                text1.encode(),
+                text2.encode(),
+                self.con,
+            )
+
+        if segmentation:
+            self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 0
+            self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 0
+
+        rgb_arr = np.zeros(
+            3 * self.viewport.width * self.viewport.height, dtype=np.uint8
+        )
+        depth_arr = np.zeros(
+            self.viewport.width * self.viewport.height, dtype=np.float32
+        )
+
+        mujoco.mjr_readPixels(rgb_arr, depth_arr, self.viewport, self.con)
+
+        if render_mode == "depth_array":
+            depth_img = depth_arr.reshape(self.viewport.height, self.viewport.width)
+            # original image is upside-down, so flip it
+            return depth_img[::-1, :]
+        else:
+            rgb_img = rgb_arr.reshape(self.viewport.height, self.viewport.width, 3)
+
+            if segmentation:
+                seg_img = (
+                    rgb_img[:, :, 0]
+                    + rgb_img[:, :, 1] * (2**8)
+                    + rgb_img[:, :, 2] * (2**16)
+                )
+                seg_img[seg_img >= (self.scn.ngeom + 1)] = 0
+                seg_ids = np.full(
+                    (self.scn.ngeom + 1, 2), fill_value=-1, dtype=np.int32
+                )
+
+                for i in range(self.scn.ngeom):
+                    geom = self.scn.geoms[i]
+                    if geom.segid != -1:
+                        seg_ids[geom.segid + 1, 0] = geom.objtype
+                        seg_ids[geom.segid + 1, 1] = geom.objid
+                rgb_img = seg_ids[seg_img]
+
+            # original image is upside-down, so flip i
+            return rgb_img[::-1, :, :]
+
+    def close(self):
+        self.free()
+        glfw.terminate()
+
+
+class WindowViewer(BaseRender):
+    """Class for window rendering in all MuJoCo environments."""
+
+    def __init__(self, model: "mujoco.MjModel", data: "mujoco.MjData"):
+        glfw.init()
+
+        self._button_left_pressed = False
+        self._button_right_pressed = False
+        self._last_mouse_x = 0
+        self._last_mouse_y = 0
+        self._paused = False
+        self._transparent = False
+        self._contacts = False
+        self._render_every_frame = True
+        self._image_idx = 0
+        self._image_path = "/tmp/frame_%07d.png"
+        self._time_per_render = 1 / 60.0
+        self._run_speed = 1.0
+        self._loop_count = 0
+        self._advance_by_one_step = False
+        self._hide_menu = False
+
+        width, height = glfw.get_video_mode(glfw.get_primary_monitor()).size
+        glfw.window_hint(glfw.VISIBLE, 1)
+        self.window = glfw.create_window(width // 2, height // 2, "mujoco", None, None)
+
+        self.width, self.height = glfw.get_framebuffer_size(self.window)
+        window_width, _ = glfw.get_window_size(self.window)
+        self._scale = self.width * 1.0 / window_width
+
+        # set callbacks
+        glfw.set_cursor_pos_callback(self.window, self._cursor_pos_callback)
+        glfw.set_mouse_button_callback(self.window, self._mouse_button_callback)
+        glfw.set_scroll_callback(self.window, self._scroll_callback)
+        glfw.set_key_callback(self.window, self._key_callback)
+
+        super().__init__(model, data, width, height)
+        glfw.swap_interval(1)
+
+    def _set_mujoco_buffer(self):
+        mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_WINDOW, self.con)
+
+    def make_context_current(self):
+        glfw.make_context_current(self.window)
+
+    def free(self):
+        if self.window:
+            if glfw.get_current_context() == self.window:
+                glfw.make_context_current(None)
+            glfw.destroy_window(self.window)
+            self.window = None
+
+    def __del__(self):
+        """Eliminate all of the OpenGL glfw contexts and windows"""
+        self.free()
+
+    def render(self):
+        """
+        Renders the environment geometries in the OpenGL glfw window:
+            1. Create the overlay for the left side panel menu.
+            2. Update the geometries used for rendering based on the current state of the model - `mujoco.mjv_updateScene()`.
+            3. Add markers to scene, these are additional geometries to include in the model, i.e arrows, https://mujoco.readthedocs.io/en/latest/APIreference.html?highlight=arrow#mjtgeom.
+                These markers are added with the `add_marker()` method before rendering.
+            4. Render the 3D scene to the window context - `mujoco.mjr_render()`.
+            5. Render overlays in the window context - `mujoco.mjr_overlay()`.
+            6. Swap front and back buffer, https://www.glfw.org/docs/3.3/quick.html.
+            7. Poll events like mouse clicks or keyboard input.
+        """
+
+        # mjv_updateScene, mjr_render, mjr_overlay
+        def update():
+            # fill overlay items
+            self._create_overlay()
+
+            render_start = time.time()
+            if self.window is None:
+                return
+            elif glfw.window_should_close(self.window):
+                glfw.destroy_window(self.window)
+                glfw.terminate()
+            self.viewport.width, self.viewport.height = glfw.get_framebuffer_size(
+                self.window
+            )
+            # update scene
+            mujoco.mjv_updateScene(
+                self.model,
+                self.data,
+                self.vopt,
+                mujoco.MjvPerturb(),
+                self.cam,
+                mujoco.mjtCatBit.mjCAT_ALL.value,
+                self.scn,
+            )
+
+            # marker items
+            for marker in self._markers:
+                self._add_marker_to_scene(marker)
+
+            # render
+            mujoco.mjr_render(self.viewport, self.scn, self.con)
+
+            # overlay items
+            if not self._hide_menu:
+                for gridpos, [t1, t2] in self._overlays.items():
+                    mujoco.mjr_overlay(
+                        mujoco.mjtFontScale.mjFONTSCALE_150,
+                        gridpos,
+                        self.viewport,
+                        t1,
+                        t2,
+                        self.con,
+                    )
+
+            glfw.swap_buffers(self.window)
+            glfw.poll_events()
+            self._time_per_render = 0.9 * self._time_per_render + 0.1 * (
+                time.time() - render_start
+            )
+
+        if self._paused:
+            while self._paused:
+                update()
+                if self._advance_by_one_step:
+                    self._advance_by_one_step = False
+                    break
+        else:
+            self._loop_count += self.model.opt.timestep / (
+                self._time_per_render * self._run_speed
+            )
+            if self._render_every_frame:
+                self._loop_count = 1
+            while self._loop_count > 0:
+                update()
+                self._loop_count -= 1
+
+        # clear overlay
+        self._overlays.clear()
+        # clear markers
+        self._markers.clear()
+
+    def close(self):
+        self.free()
+        glfw.terminate()
+
+    def _key_callback(self, window, key: int, scancode, action: int, mods):
+        if action != glfw.RELEASE:
+            return
+        # Switch cameras
+        elif key == glfw.KEY_TAB:
+            self.cam.fixedcamid += 1
+            self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED
+            if self.cam.fixedcamid >= self.model.ncam:
+                self.cam.fixedcamid = -1
+                self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
+        # Pause simulation
+        elif key == glfw.KEY_SPACE and self._paused is not None:
+            self._paused = not self._paused
+        # Advances simulation by one step.
+        elif key == glfw.KEY_RIGHT and self._paused is not None:
+            self._advance_by_one_step = True
+            self._paused = True
+        # Slows down simulation
+        elif key == glfw.KEY_S:
+            self._run_speed /= 2.0
+        # Speeds up simulation
+        elif key == glfw.KEY_F:
+            self._run_speed *= 2.0
+        # Turn off / turn on rendering every frame.
+        elif key == glfw.KEY_D:
+            self._render_every_frame = not self._render_every_frame
+        # Capture screenshot
+        elif key == glfw.KEY_T:
+            img = np.zeros(
+                (
+                    glfw.get_framebuffer_size(self.window)[1],
+                    glfw.get_framebuffer_size(self.window)[0],
+                    3,
+                ),
+                dtype=np.uint8,
+            )
+            mujoco.mjr_readPixels(img, None, self.viewport, self.con)
+            imageio.imwrite(self._image_path % self._image_idx, np.flipud(img))
+            self._image_idx += 1
+        # Display contact forces
+        elif key == glfw.KEY_C:
+            self._contacts = not self._contacts
+            self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = self._contacts
+            self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = self._contacts
+        # Display coordinate frames
+        elif key == glfw.KEY_E:
+            self.vopt.frame = 1 - self.vopt.frame
+        # Hide overlay menu
+        elif key == glfw.KEY_H:
+            self._hide_menu = not self._hide_menu
+        # Make transparent
+        elif key == glfw.KEY_R:
+            self._transparent = not self._transparent
+            if self._transparent:
+                self.model.geom_rgba[:, 3] /= 5.0
+            else:
+                self.model.geom_rgba[:, 3] *= 5.0
+        # Geom group visibility
+        elif key in (glfw.KEY_0, glfw.KEY_1, glfw.KEY_2, glfw.KEY_3, glfw.KEY_4):
+            self.vopt.geomgroup[key - glfw.KEY_0] ^= 1
+        # Quit
+        if key == glfw.KEY_ESCAPE:
+            print("Pressed ESC")
+            print("Quitting.")
+            glfw.destroy_window(self.window)
+            glfw.terminate()
+
+    def _cursor_pos_callback(
+        self, window: "glfw.LP__GLFWwindow", xpos: float, ypos: float
+    ):
+        if not (self._button_left_pressed or self._button_right_pressed):
+            return
+
+        mod_shift = (
+            glfw.get_key(window, glfw.KEY_LEFT_SHIFT) == glfw.PRESS
+            or glfw.get_key(window, glfw.KEY_RIGHT_SHIFT) == glfw.PRESS
+        )
+        if self._button_right_pressed:
+            action = (
+                mujoco.mjtMouse.mjMOUSE_MOVE_H
+                if mod_shift
+                else mujoco.mjtMouse.mjMOUSE_MOVE_V
+            )
+        elif self._button_left_pressed:
+            action = (
+                mujoco.mjtMouse.mjMOUSE_ROTATE_H
+                if mod_shift
+                else mujoco.mjtMouse.mjMOUSE_ROTATE_V
+            )
+        else:
+            action = mujoco.mjtMouse.mjMOUSE_ZOOM
+
+        dx = int(self._scale * xpos) - self._last_mouse_x
+        dy = int(self._scale * ypos) - self._last_mouse_y
+        width, height = glfw.get_framebuffer_size(window)
+
+        mujoco.mjv_moveCamera(
+            self.model, action, dx / height, dy / height, self.scn, self.cam
+        )
+
+        self._last_mouse_x = int(self._scale * xpos)
+        self._last_mouse_y = int(self._scale * ypos)
+
+    def _mouse_button_callback(self, window: "glfw.LP__GLFWwindow", button, act, mods):
+        self._button_left_pressed = (
+            glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_LEFT) == glfw.PRESS
+        )
+        self._button_right_pressed = (
+            glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_RIGHT) == glfw.PRESS
+        )
+
+        x, y = glfw.get_cursor_pos(window)
+        self._last_mouse_x = int(self._scale * x)
+        self._last_mouse_y = int(self._scale * y)
+
+    def _scroll_callback(self, window, x_offset, y_offset: float):
+        mujoco.mjv_moveCamera(
+            self.model,
+            mujoco.mjtMouse.mjMOUSE_ZOOM,
+            0,
+            -0.05 * y_offset,
+            self.scn,
+            self.cam,
+        )
+
+    def _create_overlay(self):
+        topleft = mujoco.mjtGridPos.mjGRID_TOPLEFT
+        bottomleft = mujoco.mjtGridPos.mjGRID_BOTTOMLEFT
+
+        if self._render_every_frame:
+            self.add_overlay(topleft, "", "")
+        else:
+            self.add_overlay(
+                topleft,
+                "Run speed = %.3f x real time" % self._run_speed,
+                "[S]lower, [F]aster",
+            )
+        self.add_overlay(
+            topleft, "Ren[d]er every frame", "On" if self._render_every_frame else "Off"
+        )
+        self.add_overlay(
+            topleft,
+            "Switch camera (#cams = %d)" % (self.model.ncam + 1),
+            "[Tab] (camera ID = %d)" % self.cam.fixedcamid,
+        )
+        self.add_overlay(topleft, "[C]ontact forces", "On" if self._contacts else "Off")
+        self.add_overlay(topleft, "T[r]ansparent", "On" if self._transparent else "Off")
+        if self._paused is not None:
+            if not self._paused:
+                self.add_overlay(topleft, "Stop", "[Space]")
+            else:
+                self.add_overlay(topleft, "Start", "[Space]")
+                self.add_overlay(
+                    topleft, "Advance simulation by one step", "[right arrow]"
+                )
+        self.add_overlay(
+            topleft, "Referenc[e] frames", "On" if self.vopt.frame == 1 else "Off"
+        )
+        self.add_overlay(topleft, "[H]ide Menu", "")
+        if self._image_idx > 0:
+            fname = self._image_path % (self._image_idx - 1)
+            self.add_overlay(topleft, "Cap[t]ure frame", "Saved as %s" % fname)
+        else:
+            self.add_overlay(topleft, "Cap[t]ure frame", "")
+        self.add_overlay(topleft, "Toggle geomgroup visibility", "0-4")
+
+        self.add_overlay(bottomleft, "FPS", "%d%s" % (1 / self._time_per_render, ""))
+        self.add_overlay(
+            bottomleft, "Solver iterations", str(self.data.solver_iter + 1)
+        )
+        self.add_overlay(
+            bottomleft, "Step", str(round(self.data.time / self.model.opt.timestep))
+        )
+        self.add_overlay(bottomleft, "timestep", "%.5f" % self.model.opt.timestep)
+
+
+class MujocoRenderer:
+    """This is the MuJoCo renderer manager class for every MuJoCo environment.
+
+    The class has two main public methods available:
+    - :meth:`render` - Renders the environment in three possible modes: "human", "rgb_array", or "depth_array"
+    - :meth:`close` - Closes all contexts initialized with the renderer
+
+    """
+
+    def __init__(
+        self,
+        model: "mujoco.MjModel",
+        data: "mujoco.MjData",
+        default_cam_config: Optional[dict] = None,
+    ):
+        """A wrapper for clipping continuous actions within the valid bound.
+
+        Args:
+            model: MjModel data structure of the MuJoCo simulation
+            data: MjData data structure of the MuJoCo simulation
+            default_cam_config: dictionary with attribute values of the viewer's default camera, https://mujoco.readthedocs.io/en/latest/XMLreference.html?highlight=camera#visual-global
+        """
+        self.model = model
+        self.data = data
+        self._viewers = {}
+        self.viewer = None
+        self.default_cam_config = default_cam_config
+
+    def render(
+        self,
+        render_mode: str,
+        camera_id: Optional[int] = None,
+        camera_name: Optional[str] = None,
+    ):
+        """Renders a frame of the simulation in a specific format and camera view.
+
+        Args:
+            render_mode: The format to render the frame, it can be: "human", "rgb_array", or "depth_array"
+            camera_id: The integer camera id from which to render the frame in the MuJoCo simulation
+            camera_name: The string name of the camera from which to render the frame in the MuJoCo simulation. This argument should not be passed if using cameara_id instead and vice versa
+
+        Returns:
+            If render_mode is "rgb_array" or "depth_arra" it returns a numpy array in the specified format. "human" render mode does not return anything.
+        """
+
+        viewer = self._get_viewer(render_mode=render_mode)
+
+        if render_mode in {
+            "rgb_array",
+            "depth_array",
+        }:
+            if camera_id is not None and camera_name is not None:
+                raise ValueError(
+                    "Both `camera_id` and `camera_name` cannot be"
+                    " specified at the same time."
+                )
+
+            no_camera_specified = camera_name is None and camera_id is None
+            if no_camera_specified:
+                camera_name = "track"
+
+            if camera_id is None:
+                camera_id = mujoco.mj_name2id(
+                    self.model,
+                    mujoco.mjtObj.mjOBJ_CAMERA,
+                    camera_name,
+                )
+
+            img = viewer.render(render_mode=render_mode, camera_id=camera_id)
+            return img
+
+        elif render_mode == "human":
+            return viewer.render()
+
+    def _get_viewer(self, render_mode: str):
+        """Initializes and returns a viewer class depending on the render_mode
+        - `WindowViewer` class for "human" render mode
+        - `OffScreenViewer` class for "rgb_array" or "depth_array" render mode
+        """
+        self.viewer = self._viewers.get(render_mode)
+        if self.viewer is None:
+            if render_mode == "human":
+                self.viewer = WindowViewer(self.model, self.data)
+
+            elif render_mode in {"rgb_array", "depth_array"}:
+                self.viewer = OffScreenViewer(self.model, self.data)
+            else:
+                raise AttributeError(
+                    f"Unexpected mode: {render_mode}, expected modes: human, rgb_array, or depth_array"
+                )
+            # Add default camera parameters
+            self._set_cam_config()
+            self._viewers[render_mode] = self.viewer
+
+        if len(self._viewers.keys()) > 1:
+            # Only one context can be current at a time
+            self.viewer.make_context_current()
+
+        return self.viewer
+
+    def _set_cam_config(self):
+        """Set the default camera parameters"""
+        assert self.viewer is not None
+        if self.default_cam_config is not None:
+            for key, value in self.default_cam_config.items():
+                if isinstance(value, np.ndarray):
+                    getattr(self.viewer.cam, key)[:] = value
+                else:
+                    setattr(self.viewer.cam, key, value)
+
+    def close(self):
+        """Close the OpenGL rendering contexts of all viewer modes"""
+        for _, viewer in self._viewers.items():
+            viewer.close()
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pusher.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pusher.py
@ -0,0 +1,84 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class PusherEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(self, **kwargs):
+        utils.EzPickle.__init__(self, **kwargs)
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self, "pusher.xml", 5, observation_space=observation_space, **kwargs
+        )
+
+    def step(self, a):
+        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
+        vec_2 = self.get_body_com("object") - self.get_body_com("goal")
+
+        reward_near = -np.linalg.norm(vec_1)
+        reward_dist = -np.linalg.norm(vec_2)
+        reward_ctrl = -np.square(a).sum()
+        reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
+
+        self.do_simulation(a, self.frame_skip)
+        if self.render_mode == "human":
+            self.render()
+
+        ob = self._get_obs()
+        return (
+            ob,
+            reward,
+            False,
+            False,
+            dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
+        )
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.trackbodyid = -1
+        self.viewer.cam.distance = 4.0
+
+    def reset_model(self):
+        qpos = self.init_qpos
+
+        self.goal_pos = np.asarray([0, 0])
+        while True:
+            self.cylinder_pos = np.concatenate(
+                [
+                    self.np_random.uniform(low=-0.3, high=0, size=1),
+                    self.np_random.uniform(low=-0.2, high=0.2, size=1),
+                ]
+            )
+            if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
+                break
+
+        qpos[-4:-2] = self.cylinder_pos
+        qpos[-2:] = self.goal_pos
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
+        qvel[-4:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[:7],
+                self.sim.data.qvel.flat[:7],
+                self.get_body_com("tips_arm"),
+                self.get_body_com("object"),
+                self.get_body_com("goal"),
+            ]
+        )
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/pusher_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/pusher_v4.py
@ -0,0 +1,217 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": -1,
+    "distance": 4.0,
+}
+
+
+class PusherEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+    "Pusher" is a multi-jointed robot arm which is very similar to that of a human.
+     The goal is to move a target cylinder (called *object*) to a goal position using the robot's end effector (called *fingertip*).
+      The robot consists of shoulder, elbow, forearm, and wrist joints.
+
+    ## Action Space
+    The action space is a `Box(-2, 2, (7,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints.
+
+    | Num | Action                                                             | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit         |
+    |-----|--------------------------------------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+    | 0    | Rotation of the panning the shoulder                              | -2          | 2           | r_shoulder_pan_joint             | hinge | torque (N m) |
+    | 1    | Rotation of the shoulder lifting joint                            | -2          | 2           | r_shoulder_lift_joint            | hinge | torque (N m) |
+    | 2    | Rotation of the shoulder rolling joint                            | -2          | 2           | r_upper_arm_roll_joint           | hinge | torque (N m) |
+    | 3    | Rotation of hinge joint that flexed the elbow                     | -2          | 2           | r_elbow_flex_joint               | hinge | torque (N m) |
+    | 4    | Rotation of hinge that rolls the forearm                          | -2          | 2           | r_forearm_roll_joint             | hinge | torque (N m) |
+    | 5    | Rotation of flexing the wrist                                     | -2          | 2           | r_wrist_flex_joint               | hinge | torque (N m) |
+    | 6    | Rotation of rolling the wrist                                     | -2          | 2           | r_wrist_roll_joint               | hinge | torque (N m) |
+
+    ## Observation Space
+
+    Observations consist of
+
+    - Angle of rotational joints on the pusher
+    - Angular velocities of rotational joints on the pusher
+    - The coordinates of the fingertip of the pusher
+    - The coordinates of the object to be moved
+    - The coordinates of the goal position
+
+    The observation is a `Box(-Inf, Inf, (23,), float64)` where the elements correspond to the table below.
+    An analogy can be drawn to a human arm in order to help understand the state space, with the words flex and roll meaning the
+    same as human joints.
+
+    | Num | Observation                                              | Min  | Max | Name (in corresponding XML file) | Joint    | Unit                     |
+    | --- | -------------------------------------------------------- | ---- | --- | -------------------------------- | -------- | ------------------------ |
+    | 0   | Rotation of the panning the shoulder                     | -Inf | Inf | r_shoulder_pan_joint             | hinge    | angle (rad)              |
+    | 1   | Rotation of the shoulder lifting joint                   | -Inf | Inf | r_shoulder_lift_joint            | hinge    | angle (rad)              |
+    | 2   | Rotation of the shoulder rolling joint                   | -Inf | Inf | r_upper_arm_roll_joint           | hinge    | angle (rad)              |
+    | 3   | Rotation of hinge joint that flexed the elbow            | -Inf | Inf | r_elbow_flex_joint               | hinge    | angle (rad)              |
+    | 4   | Rotation of hinge that rolls the forearm                 | -Inf | Inf | r_forearm_roll_joint             | hinge    | angle (rad)              |
+    | 5   | Rotation of flexing the wrist                            | -Inf | Inf | r_wrist_flex_joint               | hinge    | angle (rad)              |
+    | 6   | Rotation of rolling the wrist                            | -Inf | Inf | r_wrist_roll_joint               | hinge    | angle (rad)              |
+    | 7   | Rotational velocity of the panning the shoulder          | -Inf | Inf | r_shoulder_pan_joint             | hinge    | angular velocity (rad/s) |
+    | 8   | Rotational velocity of the shoulder lifting joint        | -Inf | Inf | r_shoulder_lift_joint            | hinge    | angular velocity (rad/s) |
+    | 9   | Rotational velocity of the shoulder rolling joint        | -Inf | Inf | r_upper_arm_roll_joint           | hinge    | angular velocity (rad/s) |
+    | 10  | Rotational velocity of hinge joint that flexed the elbow | -Inf | Inf | r_elbow_flex_joint               | hinge    | angular velocity (rad/s) |
+    | 11  | Rotational velocity of hinge that rolls the forearm      | -Inf | Inf | r_forearm_roll_joint             | hinge    | angular velocity (rad/s) |
+    | 12  | Rotational velocity of flexing the wrist                 | -Inf | Inf | r_wrist_flex_joint               | hinge    | angular velocity (rad/s) |
+    | 13  | Rotational velocity of rolling the wrist                 | -Inf | Inf | r_wrist_roll_joint               | hinge    | angular velocity (rad/s) |
+    | 14  | x-coordinate of the fingertip of the pusher              | -Inf | Inf | tips_arm                         | slide    | position (m)             |
+    | 15  | y-coordinate of the fingertip of the pusher              | -Inf | Inf | tips_arm                         | slide    | position (m)             |
+    | 16  | z-coordinate of the fingertip of the pusher              | -Inf | Inf | tips_arm                         | slide    | position (m)             |
+    | 17  | x-coordinate of the object to be moved                   | -Inf | Inf | object (obj_slidex)              | slide    | position (m)             |
+    | 18  | y-coordinate of the object to be moved                   | -Inf | Inf | object (obj_slidey)              | slide    | position (m)             |
+    | 19  | z-coordinate of the object to be moved                   | -Inf | Inf | object                           | cylinder | position (m)             |
+    | 20  | x-coordinate of the goal position of the object          | -Inf | Inf | goal (goal_slidex)               | slide    | position (m)             |
+    | 21  | y-coordinate of the goal position of the object          | -Inf | Inf | goal (goal_slidey)               | slide    | position (m)             |
+    | 22  | z-coordinate of the goal position of the object          | -Inf | Inf | goal                             | sphere   | position (m)             |
+
+
+    ## Rewards
+    The reward consists of two parts:
+    - *reward_near *: This reward is a measure of how far the *fingertip*
+    of the pusher (the unattached end) is from the object, with a more negative
+    value assigned for when the pusher's *fingertip* is further away from the
+    target. It is calculated as the negative vector norm of (position of
+    the fingertip - position of target), or *-norm("fingertip" - "target")*.
+    - *reward_dist *: This reward is a measure of how far the object is from
+    the target goal position, with a more negative value assigned for object is
+    further away from the target. It is calculated as the negative vector norm of
+    (position of the object - position of goal), or *-norm("object" - "target")*.
+    - *reward_control*: A negative reward for penalising the pusher if
+    it takes actions that are too large. It is measured as the negative squared
+    Euclidean norm of the action, i.e. as *- sum(action<sup>2</sup>)*.
+
+    The total reward returned is ***reward*** *=* *reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near*
+
+    Unlike other environments, Pusher does not allow you to specify weights for the individual reward terms.
+    However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms,
+    you should create a wrapper that computes the weighted reward from `info`.
+
+
+    ## Starting State
+    All pusher (not including object and goal) states start in
+    (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0). A uniform noise in the range
+    [-0.005, 0.005] is added to the velocity attributes only. The velocities of
+    the object and goal are permanently set to 0. The object's x-position is selected uniformly
+    between [-0.3, 0] while the y-position is selected uniformly between [-0.2, 0.2], and this
+    process is repeated until the vector norm between the object's (x,y) position and origin is not greater
+    than 0.17. The goal always have the same position of (0.45, -0.05, -0.323).
+
+    The default framerate is 5 with each frame lasting for 0.01, giving rise to a *dt = 5 * 0.01 = 0.05*
+
+    ## Episode End
+
+    The episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches a 100 timesteps.
+    2. Termination: Any of the state space values is no longer finite.
+
+    ## Arguments
+
+    No additional arguments are currently supported (in v2 and lower),
+    but modifications can be made to the XML file in the assets folder
+    (or by changing the path to a modified XML file in another folder)..
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Pusher-v4')
+    ```
+
+    There is no v3 for Pusher, unlike the robot environments where a v3 and
+    beyond take `gymnasmium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Pusher-v2')
+    ```
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 20,
+    }
+
+    def __init__(self, **kwargs):
+        utils.EzPickle.__init__(self, **kwargs)
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64)
+        MujocoEnv.__init__(
+            self,
+            "pusher.xml",
+            5,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    def step(self, a):
+        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
+        vec_2 = self.get_body_com("object") - self.get_body_com("goal")
+
+        reward_near = -np.linalg.norm(vec_1)
+        reward_dist = -np.linalg.norm(vec_2)
+        reward_ctrl = -np.square(a).sum()
+        reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
+
+        self.do_simulation(a, self.frame_skip)
+        if self.render_mode == "human":
+            self.render()
+
+        ob = self._get_obs()
+        return (
+            ob,
+            reward,
+            False,
+            False,
+            dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
+        )
+
+    def reset_model(self):
+        qpos = self.init_qpos
+
+        self.goal_pos = np.asarray([0, 0])
+        while True:
+            self.cylinder_pos = np.concatenate(
+                [
+                    self.np_random.uniform(low=-0.3, high=0, size=1),
+                    self.np_random.uniform(low=-0.2, high=0.2, size=1),
+                ]
+            )
+            if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
+                break
+
+        qpos[-4:-2] = self.cylinder_pos
+        qpos[-2:] = self.goal_pos
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
+        qvel[-4:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.data.qpos.flat[:7],
+                self.data.qvel.flat[:7],
+                self.get_body_com("tips_arm"),
+                self.get_body_com("object"),
+                self.get_body_com("goal"),
+            ]
+        )
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/reacher.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/reacher.py
@ -0,0 +1,75 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 50,
+    }
+
+    def __init__(self, **kwargs):
+        utils.EzPickle.__init__(self, **kwargs)
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self, "reacher.xml", 2, observation_space=observation_space, **kwargs
+        )
+
+    def step(self, a):
+        vec = self.get_body_com("fingertip") - self.get_body_com("target")
+        reward_dist = -np.linalg.norm(vec)
+        reward_ctrl = -np.square(a).sum()
+        reward = reward_dist + reward_ctrl
+
+        self.do_simulation(a, self.frame_skip)
+        if self.render_mode == "human":
+            self.render()
+
+        ob = self._get_obs()
+        return (
+            ob,
+            reward,
+            False,
+            False,
+            dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
+        )
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.trackbodyid = 0
+
+    def reset_model(self):
+        qpos = (
+            self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
+            + self.init_qpos
+        )
+        while True:
+            self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
+            if np.linalg.norm(self.goal) < 0.2:
+                break
+        qpos[-2:] = self.goal
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
+        qvel[-2:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        theta = self.sim.data.qpos.flat[:2]
+        return np.concatenate(
+            [
+                np.cos(theta),
+                np.sin(theta),
+                self.sim.data.qpos.flat[2:],
+                self.sim.data.qvel.flat[:2],
+                self.get_body_com("fingertip") - self.get_body_com("target"),
+            ]
+        )
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/reacher_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/reacher_v4.py
@ -0,0 +1,190 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {"trackbodyid": 0}
+
+
+class ReacherEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+    "Reacher" is a two-jointed robot arm. The goal is to move the robot's end effector (called *fingertip*) close to a
+    target that is spawned at a random position.
+
+    ## Action Space
+    The action space is a `Box(-1, 1, (2,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints.
+
+    | Num | Action                                                                          | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+    |-----|---------------------------------------------------------------------------------|-------------|-------------|--------------------------|-------|------|
+    | 0   | Torque applied at the first hinge (connecting the link to the point of fixture) | -1 | 1 | joint0  | hinge | torque (N m) |
+    | 1   |  Torque applied at the second hinge (connecting the two links)                  | -1 | 1 | joint1  | hinge | torque (N m) |
+
+    ## Observation Space
+    Observations consist of
+
+    - The cosine of the angles of the two arms
+    - The sine of the angles of the two arms
+    - The coordinates of the target
+    - The angular velocities of the arms
+    - The vector between the target and the reacher's fingertip (3 dimensional with the last element being 0)
+
+    The observation is a `Box(-Inf, Inf, (11,), float64)` where the elements correspond to the following:
+
+    | Num | Observation                                                                                    | Min  | Max | Name (in corresponding XML file) | Joint | Unit                     |
+    | --- | ---------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
+    | 0   | cosine of the angle of the first arm                                                           | -Inf | Inf | cos(joint0)                      | hinge | unitless                 |
+    | 1   | cosine of the angle of the second arm                                                          | -Inf | Inf | cos(joint1)                      | hinge | unitless                 |
+    | 2   | sine of the angle of the first arm                                                             | -Inf | Inf | sin(joint0)                      | hinge | unitless                 |
+    | 3   | sine of the angle of the second arm                                                            | -Inf | Inf | sin(joint1)                      | hinge | unitless                 |
+    | 4   | x-coordinate of the target                                                                     | -Inf | Inf | target_x                         | slide | position (m)             |
+    | 5   | y-coordinate of the target                                                                     | -Inf | Inf | target_y                         | slide | position (m)             |
+    | 6   | angular velocity of the first arm                                                              | -Inf | Inf | joint0                           | hinge | angular velocity (rad/s) |
+    | 7   | angular velocity of the second arm                                                             | -Inf | Inf | joint1                           | hinge | angular velocity (rad/s) |
+    | 8   | x-value of position_fingertip - position_target                                                | -Inf | Inf | NA                               | slide | position (m)             |
+    | 9   | y-value of position_fingertip - position_target                                                | -Inf | Inf | NA                               | slide | position (m)             |
+    | 10  | z-value of position_fingertip - position_target (constantly 0 since reacher is 2d and z is same for both) | -Inf | Inf | NA                               | slide | position (m)             |
+
+
+    Most Gym environments just return the positions and velocity of the
+    joints in the `.xml` file as the state of the environment. However, in
+    reacher the state is created by combining only certain elements of the
+    position and velocity, and performing some function transformations on them.
+    If one is to read the `.xml` for reacher then they will find 4 joints:
+
+    | Num | Observation                 | Min      | Max      | Name (in corresponding XML file) | Joint | Unit               |
+    |-----|-----------------------------|----------|----------|----------------------------------|-------|--------------------|
+    | 0   | angle of the first arm      | -Inf     | Inf      | joint0                           | hinge | angle (rad)        |
+    | 1   | angle of the second arm     | -Inf     | Inf      | joint1                           | hinge | angle (rad)        |
+    | 2   | x-coordinate of the target  | -Inf     | Inf      | target_x                         | slide | position (m)       |
+    | 3   | y-coordinate of the target  | -Inf     | Inf      | target_y                         | slide | position (m)       |
+
+
+    ## Rewards
+    The reward consists of two parts:
+    - *reward_distance*: This reward is a measure of how far the *fingertip*
+    of the reacher (the unattached end) is from the target, with a more negative
+    value assigned for when the reacher's *fingertip* is further away from the
+    target. It is calculated as the negative vector norm of (position of
+    the fingertip - position of target), or *-norm("fingertip" - "target")*.
+    - *reward_control*: A negative reward for penalising the walker if
+    it takes actions that are too large. It is measured as the negative squared
+    Euclidean norm of the action, i.e. as *- sum(action<sup>2</sup>)*.
+
+    The total reward returned is ***reward*** *=* *reward_distance + reward_control*
+
+    Unlike other environments, Reacher does not allow you to specify weights for the individual reward terms.
+    However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms,
+    you should create a wrapper that computes the weighted reward from `info`.
+
+
+    ## Starting State
+    All observations start in state
+    (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+    with a noise added for stochasticity. A uniform noise in the range
+    [-0.1, 0.1] is added to the positional attributes, while the target position
+    is selected uniformly at random in a disk of radius 0.2 around the origin.
+    Independent, uniform noise in the
+    range of [-0.005, 0.005] is added to the velocities, and the last
+    element ("fingertip" - "target") is calculated at the end once everything
+    is set. The default setting has a framerate of 2 and a *dt = 2 * 0.01 = 0.02*
+
+    ## Episode End
+
+    The episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches a 50 timesteps (with a new random target popping up if the reacher's fingertip reaches it before 50 timesteps)
+    2. Termination: Any of the state space values is no longer finite.
+
+    ## Arguments
+
+    No additional arguments are currently supported (in v2 and lower),
+    but modifications can be made to the XML file in the assets folder
+    (or by changing the path to a modified XML file in another folder)..
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Reacher-v4')
+    ```
+
+    There is no v3 for Reacher, unlike the robot environments where a v3 and
+    beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 50,
+    }
+
+    def __init__(self, **kwargs):
+        utils.EzPickle.__init__(self, **kwargs)
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
+        MujocoEnv.__init__(
+            self,
+            "reacher.xml",
+            2,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    def step(self, a):
+        vec = self.get_body_com("fingertip") - self.get_body_com("target")
+        reward_dist = -np.linalg.norm(vec)
+        reward_ctrl = -np.square(a).sum()
+        reward = reward_dist + reward_ctrl
+
+        self.do_simulation(a, self.frame_skip)
+        if self.render_mode == "human":
+            self.render()
+
+        ob = self._get_obs()
+        return (
+            ob,
+            reward,
+            False,
+            False,
+            dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
+        )
+
+    def reset_model(self):
+        qpos = (
+            self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
+            + self.init_qpos
+        )
+        while True:
+            self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
+            if np.linalg.norm(self.goal) < 0.2:
+                break
+        qpos[-2:] = self.goal
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
+        qvel[-2:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        theta = self.data.qpos.flat[:2]
+        return np.concatenate(
+            [
+                np.cos(theta),
+                np.sin(theta),
+                self.data.qpos.flat[2:],
+                self.data.qvel.flat[:2],
+                self.get_body_com("fingertip") - self.get_body_com("target"),
+            ]
+        )
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/swimmer.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/swimmer.py
@ -0,0 +1,59 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 25,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self, "swimmer.xml", 4, observation_space=observation_space, **kwargs
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def step(self, a):
+        ctrl_cost_coeff = 0.0001
+        xposbefore = self.sim.data.qpos[0]
+        self.do_simulation(a, self.frame_skip)
+        xposafter = self.sim.data.qpos[0]
+
+        reward_fwd = (xposafter - xposbefore) / self.dt
+        reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
+        reward = reward_fwd + reward_ctrl
+        ob = self._get_obs()
+
+        if self.render_mode == "human":
+            self.render()
+
+        return (
+            ob,
+            reward,
+            False,
+            False,
+            dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl),
+        )
+
+    def _get_obs(self):
+        qpos = self.sim.data.qpos
+        qvel = self.sim.data.qvel
+        return np.concatenate([qpos.flat[2:], qvel.flat])
+
+    def reset_model(self):
+        self.set_state(
+            self.init_qpos
+            + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv),
+        )
+        return self._get_obs()
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/swimmer_v3.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/swimmer_v3.py
@ -0,0 +1,129 @@
+__credits__ = ["Rushiv Arora"]
+
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {}
+
+
+class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 25,
+    }
+
+    def __init__(
+        self,
+        xml_file="swimmer.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-4,
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            xml_file,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64
+            )
+
+        MuJocoPyEnv.__init__(
+            self, xml_file, 4, observation_space=observation_space, **kwargs
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    def step(self, action):
+        xy_position_before = self.sim.data.qpos[0:2].copy()
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = self.sim.data.qpos[0:2].copy()
+
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+
+        forward_reward = self._forward_reward_weight * x_velocity
+        ctrl_cost = self.control_cost(action)
+
+        observation = self._get_obs()
+        reward = forward_reward - ctrl_cost
+        info = {
+            "reward_fwd": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+
+        return observation, reward, False, False, info
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = self.sim.data.qvel.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+
+        observation = np.concatenate([position, velocity]).ravel()
+        return observation
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/swimmer_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/swimmer_v4.py
@ -0,0 +1,232 @@
+__credits__ = ["Rushiv Arora"]
+
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+class SwimmerEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment corresponds to the Swimmer environment described in Rémi Coulom's PhD thesis
+    ["Reinforcement Learning Using Neural Networks, with Applications to Motor Control"](https://tel.archives-ouvertes.fr/tel-00003985/document).
+    The environment aims to increase the number of independent state and control
+    variables as compared to the classic control environments. The swimmers
+    consist of three or more segments ('***links***') and one less articulation
+    joints ('***rotors***') - one rotor joint connecting exactly two links to
+    form a linear chain. The swimmer is suspended in a two dimensional pool and
+    always starts in the same position (subject to some deviation drawn from an
+    uniform distribution), and the goal is to move as fast as possible towards
+    the right by applying torque on the rotors and using the fluids friction.
+
+    ## Notes
+
+    The problem parameters are:
+    Problem parameters:
+    * *n*: number of body parts
+    * *m<sub>i</sub>*: mass of part *i* (*i* ∈ {1...n})
+    * *l<sub>i</sub>*: length of part *i* (*i* ∈ {1...n})
+    * *k*: viscous-friction coefficient
+
+    While the default environment has *n* = 3, *l<sub>i</sub>* = 0.1,
+    and *k* = 0.1. It is possible to pass a custom MuJoCo XML file during construction to increase the
+    number of links, or to tweak any of the parameters.
+
+    ## Action Space
+    The action space is a `Box(-1, 1, (2,), float32)`. An action represents the torques applied between *links*
+
+    | Num | Action                             | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit         |
+    |-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+    | 0   | Torque applied on the first rotor  | -1          | 1           | motor1_rot                       | hinge | torque (N m) |
+    | 1   | Torque applied on the second rotor | -1          | 1           | motor2_rot                       | hinge | torque (N m) |
+
+    ## Observation Space
+    By default, observations consists of:
+    * θ<sub>i</sub>: angle of part *i* with respect to the *x* axis
+    * θ<sub>i</sub>': its derivative with respect to time (angular velocity)
+
+    In the default case, observations do not include the x- and y-coordinates of the front tip. These may
+    be included by passing `exclude_current_positions_from_observation=False` during construction.
+    Then, the observation space will be `Box(-Inf, Inf, (10,), float64)` where the first two observations
+    represent the x- and y-coordinates of the front tip.
+    Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
+    will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
+
+    By default, the observation is a `Box(-Inf, Inf, (8,), float64)` where the elements correspond to the following:
+
+    | Num | Observation                          | Min  | Max | Name (in corresponding XML file) | Joint | Unit                     |
+    | --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
+    | 0   | angle of the front tip               | -Inf | Inf | free_body_rot                    | hinge | angle (rad)              |
+    | 1   | angle of the first rotor             | -Inf | Inf | motor1_rot                       | hinge | angle (rad)              |
+    | 2   | angle of the second rotor            | -Inf | Inf | motor2_rot                       | hinge | angle (rad)              |
+    | 3   | velocity of the tip along the x-axis | -Inf | Inf | slider1                          | slide | velocity (m/s)           |
+    | 4   | velocity of the tip along the y-axis | -Inf | Inf | slider2                          | slide | velocity (m/s)           |
+    | 5   | angular velocity of front tip        | -Inf | Inf | free_body_rot                    | hinge | angular velocity (rad/s) |
+    | 6   | angular velocity of first rotor      | -Inf | Inf | motor1_rot                       | hinge | angular velocity (rad/s) |
+    | 7   | angular velocity of second rotor     | -Inf | Inf | motor2_rot                       | hinge | angular velocity (rad/s) |
+    | excluded | position of the tip along the x-axis | -Inf | Inf | slider1                          | slide | position (m)           |
+    | excluded | position of the tip along the y-axis | -Inf | Inf | slider2                          | slide | position (m)           |
+
+    ## Rewards
+    The reward consists of two parts:
+    - *forward_reward*: A reward of moving forward which is measured
+    as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
+    the time between actions and is dependent on the frame_skip parameter
+    (default is 4), where the frametime is 0.01 - making the
+    default *dt = 4 * 0.01 = 0.04*. This reward would be positive if the swimmer
+    swims right as desired.
+    - *ctrl_cost*: A cost for penalising the swimmer if it takes
+    actions that are too large. It is measured as *`ctrl_cost_weight` *
+    sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
+    control and has a default value of 1e-4
+
+    The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+    ## Starting State
+    All observations start in state (0,0,0,0,0,0,0,0) with a Uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the initial state for stochasticity.
+
+    ## Episode End
+    The episode truncates when the episode length is greater than 1000.
+
+    ## Arguments
+
+    No additional arguments are currently supported in v2 and lower.
+
+    ```python
+    import gymnasium as gym
+    gym.make('Swimmer-v4')
+    ```
+
+    v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Swimmer-v4', ctrl_cost_weight=0.1, ....)
+    ```
+
+    | Parameter                                    | Type      | Default         | Description                                                                                                                                                               |
+    | -------------------------------------------- | --------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+    | `xml_file`                                   | **str**   | `"swimmer.xml"` | Path to a MuJoCo model                                                                                                                                                    |
+    | `forward_reward_weight`                      | **float** | `1.0`           | Weight for _forward_reward_ term (see section on reward)                                                                                                                  |
+    | `ctrl_cost_weight`                           | **float** | `1e-4`          | Weight for _ctrl_cost_ term (see section on reward)                                                                                                                       |
+    | `reset_noise_scale`                          | **float** | `0.1`           | Scale of random perturbations of initial position and velocity (see section on Starting State)                                                                            |
+    | `exclude_current_positions_from_observation` | **bool**  | `True`          | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 25,
+    }
+
+    def __init__(
+        self,
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-4,
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64
+            )
+        MujocoEnv.__init__(
+            self, "swimmer.xml", 4, observation_space=observation_space, **kwargs
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    def step(self, action):
+        xy_position_before = self.data.qpos[0:2].copy()
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = self.data.qpos[0:2].copy()
+
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+
+        forward_reward = self._forward_reward_weight * x_velocity
+
+        ctrl_cost = self.control_cost(action)
+
+        observation = self._get_obs()
+        reward = forward_reward - ctrl_cost
+        info = {
+            "reward_fwd": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+
+        return observation, reward, False, False, info
+
+    def _get_obs(self):
+        position = self.data.qpos.flat.copy()
+        velocity = self.data.qvel.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+
+        observation = np.concatenate([position, velocity]).ravel()
+        return observation
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/walker2d.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/walker2d.py
@ -0,0 +1,61 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 125,
+    }
+
+    def __init__(self, **kwargs):
+        observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
+        MuJocoPyEnv.__init__(
+            self, "walker2d.xml", 4, observation_space=observation_space, **kwargs
+        )
+        utils.EzPickle.__init__(self, **kwargs)
+
+    def step(self, a):
+        posbefore = self.sim.data.qpos[0]
+        self.do_simulation(a, self.frame_skip)
+        posafter, height, ang = self.sim.data.qpos[0:3]
+
+        alive_bonus = 1.0
+        reward = (posafter - posbefore) / self.dt
+        reward += alive_bonus
+        reward -= 1e-3 * np.square(a).sum()
+        terminated = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
+        ob = self._get_obs()
+
+        if self.render_mode == "human":
+            self.render()
+
+        return ob, reward, terminated, False, {}
+
+    def _get_obs(self):
+        qpos = self.sim.data.qpos
+        qvel = self.sim.data.qvel
+        return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
+
+    def reset_model(self):
+        self.set_state(
+            self.init_qpos
+            + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv),
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        self.viewer.cam.trackbodyid = 2
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
+        self.viewer.cam.lookat[2] = 1.15
+        self.viewer.cam.elevation = -20
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/walker2d_v3.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/walker2d_v3.py
@ -0,0 +1,168 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MuJocoPyEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 2,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
+}
+
+
+class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 125,
+    }
+
+    def __init__(
+        self,
+        xml_file="walker2d.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.8, 2.0),
+        healthy_angle_range=(-1.0, 1.0),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            xml_file,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_z_range,
+            healthy_angle_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+
+        self._healthy_z_range = healthy_z_range
+        self._healthy_angle_range = healthy_angle_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
+            )
+
+        MuJocoPyEnv.__init__(
+            self, xml_file, 4, observation_space=observation_space, **kwargs
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    @property
+    def is_healthy(self):
+        z, angle = self.sim.data.qpos[1:3]
+
+        min_z, max_z = self._healthy_z_range
+        min_angle, max_angle = self._healthy_angle_range
+
+        healthy_z = min_z < z < max_z
+        healthy_angle = min_angle < angle < max_angle
+        is_healthy = healthy_z and healthy_angle
+
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = not self.is_healthy if self._terminate_when_unhealthy else False
+        return terminated
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
+
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+
+    def step(self, action):
+        x_position_before = self.sim.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.sim.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+
+        ctrl_cost = self.control_cost(action)
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost
+
+        observation = self._get_obs()
+        reward = rewards - costs
+        terminated = self.terminated
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+
+        return observation, reward, terminated, False, info
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
+
+    def viewer_setup(self):
+        assert self.viewer is not None
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
--- a/rl/Lib/site-packages/gymnasium/envs/mujoco/walker2d_v4.py
+++ b/rl/Lib/site-packages/gymnasium/envs/mujoco/walker2d_v4.py
@ -0,0 +1,296 @@
+import numpy as np
+
+from gymnasium import utils
+from gymnasium.envs.mujoco import MujocoEnv
+from gymnasium.spaces import Box
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 2,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
+}
+
+
+class Walker2dEnv(MujocoEnv, utils.EzPickle):
+    """
+    ## Description
+
+    This environment builds on the [hopper](https://gymnasium.farama.org/environments/mujoco/hopper/) environment
+    by adding another set of legs making it possible for the robot to walk forward instead of
+    hop. Like other Mujoco environments, this environment aims to increase the number of independent state
+    and control variables as compared to the classic control environments. The walker is a
+    two-dimensional two-legged figure that consist of seven main body parts - a single torso at the top
+    (with the two legs splitting after the torso), two thighs in the middle below the torso, two legs
+    in the bottom below the thighs, and two feet attached to the legs on which the entire body rests.
+    The goal is to walk in the in the forward (right)
+    direction by applying torques on the six hinges connecting the seven body parts.
+
+    ## Action Space
+    The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
+
+    | Num | Action                                 | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit         |
+    |-----|----------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+    | 0   | Torque applied on the thigh rotor      | -1          | 1           | thigh_joint                      | hinge | torque (N m) |
+    | 1   | Torque applied on the leg rotor        | -1          | 1           | leg_joint                        | hinge | torque (N m) |
+    | 2   | Torque applied on the foot rotor       | -1          | 1           | foot_joint                       | hinge | torque (N m) |
+    | 3   | Torque applied on the left thigh rotor | -1          | 1           | thigh_left_joint                 | hinge | torque (N m) |
+    | 4   | Torque applied on the left leg rotor   | -1          | 1           | leg_left_joint                   | hinge | torque (N m) |
+    | 5   | Torque applied on the left foot rotor  | -1          | 1           | foot_left_joint                  | hinge | torque (N m) |
+
+    ## Observation Space
+    Observations consist of positional values of different body parts of the walker,
+    followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
+
+    By default, observations do not include the x-coordinate of the torso. It may
+    be included by passing `exclude_current_positions_from_observation=False` during construction.
+    In that case, the observation space will be `Box(-Inf, Inf, (18,), float64)` where the first observation
+    represent the x-coordinates of the torso of the walker.
+    Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
+    of the torso will be returned in `info` with key `"x_position"`.
+
+    By default, observation is a `Box(-Inf, Inf, (17,), float64)` where the elements correspond to the following:
+
+    | Num | Observation                                        | Min  | Max | Name (in corresponding XML file) | Joint | Unit                     |
+    | --- | -------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
+    | excluded | x-coordinate of the torso                     | -Inf | Inf | rootx                            | slide | position (m)             |
+    | 0   | z-coordinate of the torso (height of Walker2d)     | -Inf | Inf | rootz                            | slide | position (m)             |
+    | 1   | angle of the torso                                 | -Inf | Inf | rooty                            | hinge | angle (rad)              |
+    | 2   | angle of the thigh joint                           | -Inf | Inf | thigh_joint                      | hinge | angle (rad)              |
+    | 3   | angle of the leg joint                             | -Inf | Inf | leg_joint                        | hinge | angle (rad)              |
+    | 4   | angle of the foot joint                            | -Inf | Inf | foot_joint                       | hinge | angle (rad)              |
+    | 5   | angle of the left thigh joint                      | -Inf | Inf | thigh_left_joint                 | hinge | angle (rad)              |
+    | 6   | angle of the left leg joint                        | -Inf | Inf | leg_left_joint                   | hinge | angle (rad)              |
+    | 7   | angle of the left foot joint                       | -Inf | Inf | foot_left_joint                  | hinge | angle (rad)              |
+    | 8   | velocity of the x-coordinate of the torso          | -Inf | Inf | rootx                            | slide | velocity (m/s)           |
+    | 9   | velocity of the z-coordinate (height) of the torso | -Inf | Inf | rootz                            | slide | velocity (m/s)           |
+    | 10  | angular velocity of the angle of the torso         | -Inf | Inf | rooty                            | hinge | angular velocity (rad/s) |
+    | 11  | angular velocity of the thigh hinge                | -Inf | Inf | thigh_joint                      | hinge | angular velocity (rad/s) |
+    | 12  | angular velocity of the leg hinge                  | -Inf | Inf | leg_joint                        | hinge | angular velocity (rad/s) |
+    | 13  | angular velocity of the foot hinge                 | -Inf | Inf | foot_joint                       | hinge | angular velocity (rad/s) |
+    | 14  | angular velocity of the thigh hinge                | -Inf | Inf | thigh_left_joint                 | hinge | angular velocity (rad/s) |
+    | 15  | angular velocity of the leg hinge                  | -Inf | Inf | leg_left_joint                   | hinge | angular velocity (rad/s) |
+    | 16  | angular velocity of the foot hinge                 | -Inf | Inf | foot_left_joint                  | hinge | angular velocity (rad/s) |
+
+    ## Rewards
+    The reward consists of three parts:
+    - *healthy_reward*: Every timestep that the walker is alive, it receives a fixed reward of value `healthy_reward`,
+    - *forward_reward*: A reward of walking forward which is measured as
+    *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*.
+    *dt* is the time between actions and is dependeent on the frame_skip parameter
+    (default is 4), where the frametime is 0.002 - making the default
+    *dt = 4 * 0.002 = 0.008*. This reward would be positive if the walker walks forward (positive x direction).
+    - *ctrl_cost*: A cost for penalising the walker if it
+    takes actions that are too large. It is measured as
+    *`ctrl_cost_weight` * sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is
+    a parameter set for the control and has a default value of 0.001
+
+    The total reward returned is ***reward*** *=* *healthy_reward bonus + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+    ## Starting State
+    All observations start in state
+    (0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+    with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity.
+
+    ## Episode End
+    The walker is said to be unhealthy if any of the following happens:
+
+    1. Any of the state space values is no longer finite
+    2. The height of the walker is ***not*** in the closed interval specified by `healthy_z_range`
+    3. The absolute value of the angle (`observation[1]` if `exclude_current_positions_from_observation=False`, else `observation[2]`) is ***not*** in the closed interval specified by `healthy_angle_range`
+
+    If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+    the episode ends when any of the following happens:
+
+    1. Truncation: The episode duration reaches a 1000 timesteps
+    2. Termination: The walker is unhealthy
+
+    If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+    ## Arguments
+
+    No additional arguments are currently supported in v2 and lower.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Walker2d-v4')
+    ```
+
+    v3 and beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
+
+    ```python
+    import gymnasium as gym
+    env = gym.make('Walker2d-v4', ctrl_cost_weight=0.1, ....)
+    ```
+
+    | Parameter                                    | Type      | Default          | Description                                                                                                                                                       |
+    | -------------------------------------------- | --------- | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+    | `xml_file`                                   | **str**   | `"walker2d.xml"` | Path to a MuJoCo model                                                                                                                                            |
+    | `forward_reward_weight`                      | **float** | `1.0`            | Weight for _forward_reward_ term (see section on reward)                                                                                                          |
+    | `ctrl_cost_weight`                           | **float** | `1e-3`           | Weight for _ctr_cost_ term (see section on reward)                                                                                                                |
+    | `healthy_reward`                             | **float** | `1.0`            | Constant reward given if the ant is "healthy" after timestep                                                                                                      |
+    | `terminate_when_unhealthy`                   | **bool**  | `True`           | If true, issue a done signal if the z-coordinate of the walker is no longer healthy                                                                               |
+    | `healthy_z_range`                            | **tuple** | `(0.8, 2)`       | The z-coordinate of the torso of the walker must be in this range to be considered healthy                                                                        |
+    | `healthy_angle_range`                        | **tuple** | `(-1, 1)`        | The angle must be in this range to be considered healthy                                                                                                          |
+    | `reset_noise_scale`                          | **float** | `5e-3`           | Scale of random perturbations of initial position and velocity (see section on Starting State)                                                                    |
+    | `exclude_current_positions_from_observation` | **bool**  | `True`           | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+
+    ## Version History
+
+    * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
+    * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+    * v2: All continuous control environments now use mujoco-py >= 1.50
+    * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+    * v0: Initial versions release (1.0.0)
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 125,
+    }
+
+    def __init__(
+        self,
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.8, 2.0),
+        healthy_angle_range=(-1.0, 1.0),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+        **kwargs,
+    ):
+        utils.EzPickle.__init__(
+            self,
+            forward_reward_weight,
+            ctrl_cost_weight,
+            healthy_reward,
+            terminate_when_unhealthy,
+            healthy_z_range,
+            healthy_angle_range,
+            reset_noise_scale,
+            exclude_current_positions_from_observation,
+            **kwargs,
+        )
+
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+
+        self._healthy_z_range = healthy_z_range
+        self._healthy_angle_range = healthy_angle_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        if exclude_current_positions_from_observation:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
+            )
+        else:
+            observation_space = Box(
+                low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
+            )
+
+        MujocoEnv.__init__(
+            self,
+            "walker2d.xml",
+            4,
+            observation_space=observation_space,
+            default_camera_config=DEFAULT_CAMERA_CONFIG,
+            **kwargs,
+        )
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    @property
+    def is_healthy(self):
+        z, angle = self.data.qpos[1:3]
+
+        min_z, max_z = self._healthy_z_range
+        min_angle, max_angle = self._healthy_angle_range
+
+        healthy_z = min_z < z < max_z
+        healthy_angle = min_angle < angle < max_angle
+        is_healthy = healthy_z and healthy_angle
+
+        return is_healthy
+
+    @property
+    def terminated(self):
+        terminated = not self.is_healthy if self._terminate_when_unhealthy else False
+        return terminated
+
+    def _get_obs(self):
+        position = self.data.qpos.flat.copy()
+        velocity = np.clip(self.data.qvel.flat.copy(), -10, 10)
+
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+
+    def step(self, action):
+        x_position_before = self.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+
+        ctrl_cost = self.control_cost(action)
+
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost
+
+        observation = self._get_obs()
+        reward = rewards - costs
+        terminated = self.terminated
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+        }
+
+        if self.render_mode == "human":
+            self.render()
+
+        return observation, reward, terminated, False, info
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation