I am done

2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/init.py
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/init.py
@ -0,0 +1,3 @@
+from gymnasium.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
+from gymnasium.envs.box2d.car_racing import CarRacing
+from gymnasium.envs.box2d.lunar_lander import LunarLander, LunarLanderContinuous
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/init.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/init.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/bipedal_walker.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/bipedal_walker.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/car_dynamics.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/car_dynamics.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/car_racing.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/car_racing.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/lunar_lander.cpython-312.pyc
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/pycache/lunar_lander.cpython-312.pyc
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/bipedal_walker.py
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/bipedal_walker.py
@ -0,0 +1,861 @@
+__credits__ = ["Andrea PIERRÉ"]
+
+import math
+from typing import TYPE_CHECKING, List, Optional
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import error, spaces
+from gymnasium.error import DependencyNotInstalled
+from gymnasium.utils import EzPickle
+
+
+try:
+    import Box2D
+    from Box2D.b2 import (
+        circleShape,
+        contactListener,
+        edgeShape,
+        fixtureDef,
+        polygonShape,
+        revoluteJointDef,
+    )
+except ImportError as e:
+    raise DependencyNotInstalled(
+        "Box2D is not installed, run `pip install gymnasium[box2d]`"
+    ) from e
+
+
+if TYPE_CHECKING:
+    import pygame
+
+FPS = 50
+SCALE = 30.0  # affects how fast-paced the game is, forces should be adjusted as well
+
+MOTORS_TORQUE = 80
+SPEED_HIP = 4
+SPEED_KNEE = 6
+LIDAR_RANGE = 160 / SCALE
+
+INITIAL_RANDOM = 5
+
+HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
+LEG_DOWN = -8 / SCALE
+LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+TERRAIN_STEP = 14 / SCALE
+TERRAIN_LENGTH = 200  # in steps
+TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
+TERRAIN_GRASS = 10  # low long are grass spots, in steps
+TERRAIN_STARTPAD = 20  # in steps
+FRICTION = 2.5
+
+HULL_FD = fixtureDef(
+    shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
+    density=5.0,
+    friction=0.1,
+    categoryBits=0x0020,
+    maskBits=0x001,  # collide only with ground
+    restitution=0.0,
+)  # 0.99 bouncy
+
+LEG_FD = fixtureDef(
+    shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
+    density=1.0,
+    restitution=0.0,
+    categoryBits=0x0020,
+    maskBits=0x001,
+)
+
+LOWER_FD = fixtureDef(
+    shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
+    density=1.0,
+    restitution=0.0,
+    categoryBits=0x0020,
+    maskBits=0x001,
+)
+
+
+class ContactDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+
+    def BeginContact(self, contact):
+        if (
+            self.env.hull == contact.fixtureA.body
+            or self.env.hull == contact.fixtureB.body
+        ):
+            self.env.game_over = True
+        for leg in [self.env.legs[1], self.env.legs[3]]:
+            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
+                leg.ground_contact = True
+
+    def EndContact(self, contact):
+        for leg in [self.env.legs[1], self.env.legs[3]]:
+            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
+                leg.ground_contact = False
+
+
+class BipedalWalker(gym.Env, EzPickle):
+    """
+    ## Description
+    This is a simple 4-joint walker robot environment.
+    There are two versions:
+    - Normal, with slightly uneven terrain.
+    - Hardcore, with ladders, stumps, pitfalls.
+
+    To solve the normal version, you need to get 300 points in 1600 time steps.
+    To solve the hardcore version, you need 300 points in 2000 time steps.
+
+    A heuristic is provided for testing. It's also useful to get demonstrations
+    to learn from. To run the heuristic:
+    ```
+    python gymnasium/envs/box2d/bipedal_walker.py
+    ```
+
+    ## Action Space
+    Actions are motor speed values in the [-1, 1] range for each of the
+    4 joints at both hips and knees.
+
+    ## Observation Space
+    State consists of hull angle speed, angular velocity, horizontal speed,
+    vertical speed, position of joints and joints angular speed, legs contact
+    with ground, and 10 lidar rangefinder measurements. There are no coordinates
+    in the state vector.
+
+    ## Rewards
+    Reward is given for moving forward, totaling 300+ points up to the far end.
+    If the robot falls, it gets -100. Applying motor torque costs a small
+    amount of points. A more optimal agent will get a better score.
+
+    ## Starting State
+    The walker starts standing at the left end of the terrain with the hull
+    horizontal, and both legs in the same position with a slight knee angle.
+
+    ## Episode Termination
+    The episode will terminate if the hull gets in contact with the ground or
+    if the walker exceeds the right end of the terrain length.
+
+    ## Arguments
+    To use to the _hardcore_ environment, you need to specify the
+    `hardcore=True` argument like below:
+    ```python
+    import gymnasium as gym
+    env = gym.make("BipedalWalker-v3", hardcore=True)
+    ```
+
+    ## Version History
+    - v3: Returns the closest lidar trace instead of furthest;
+        faster video recording
+    - v2: Count energy spent
+    - v1: Legs now report contact with ground; motors have higher torque and
+        speed; ground has higher friction; lidar rendered less nervously.
+    - v0: Initial version
+
+
+    <!-- ## References -->
+
+    ## Credits
+    Created by Oleg Klimov
+
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": FPS,
+    }
+
+    def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False):
+        EzPickle.__init__(self, render_mode, hardcore)
+        self.isopen = True
+
+        self.world = Box2D.b2World()
+        self.terrain: List[Box2D.b2Body] = []
+        self.hull: Optional[Box2D.b2Body] = None
+
+        self.prev_shaping = None
+
+        self.hardcore = hardcore
+
+        self.fd_polygon = fixtureDef(
+            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
+            friction=FRICTION,
+        )
+
+        self.fd_edge = fixtureDef(
+            shape=edgeShape(vertices=[(0, 0), (1, 1)]),
+            friction=FRICTION,
+            categoryBits=0x0001,
+        )
+
+        # we use 5.0 to represent the joints moving at maximum
+        # 5 x the rated speed due to impulses from ground contact etc.
+        low = np.array(
+            [
+                -math.pi,
+                -5.0,
+                -5.0,
+                -5.0,
+                -math.pi,
+                -5.0,
+                -math.pi,
+                -5.0,
+                -0.0,
+                -math.pi,
+                -5.0,
+                -math.pi,
+                -5.0,
+                -0.0,
+            ]
+            + [-1.0] * 10
+        ).astype(np.float32)
+        high = np.array(
+            [
+                math.pi,
+                5.0,
+                5.0,
+                5.0,
+                math.pi,
+                5.0,
+                math.pi,
+                5.0,
+                5.0,
+                math.pi,
+                5.0,
+                math.pi,
+                5.0,
+                5.0,
+            ]
+            + [1.0] * 10
+        ).astype(np.float32)
+        self.action_space = spaces.Box(
+            np.array([-1, -1, -1, -1]).astype(np.float32),
+            np.array([1, 1, 1, 1]).astype(np.float32),
+        )
+        self.observation_space = spaces.Box(low, high)
+
+        # state = [
+        #     self.hull.angle,  # Normal angles up to 0.5 here, but sure more is possible.
+        #     2.0 * self.hull.angularVelocity / FPS,
+        #     0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS,  # Normalized to get -1..1 range
+        #     0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
+        #     self.joints[
+        #         0
+        #     ].angle,  # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
+        #     self.joints[0].speed / SPEED_HIP,
+        #     self.joints[1].angle + 1.0,
+        #     self.joints[1].speed / SPEED_KNEE,
+        #     1.0 if self.legs[1].ground_contact else 0.0,
+        #     self.joints[2].angle,
+        #     self.joints[2].speed / SPEED_HIP,
+        #     self.joints[3].angle + 1.0,
+        #     self.joints[3].speed / SPEED_KNEE,
+        #     1.0 if self.legs[3].ground_contact else 0.0,
+        # ]
+        # state += [l.fraction for l in self.lidar]
+
+        self.render_mode = render_mode
+        self.screen: Optional[pygame.Surface] = None
+        self.clock = None
+
+    def _destroy(self):
+        if not self.terrain:
+            return
+        self.world.contactListener = None
+        for t in self.terrain:
+            self.world.DestroyBody(t)
+        self.terrain = []
+        self.world.DestroyBody(self.hull)
+        self.hull = None
+        for leg in self.legs:
+            self.world.DestroyBody(leg)
+        self.legs = []
+        self.joints = []
+
+    def _generate_terrain(self, hardcore):
+        GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
+        state = GRASS
+        velocity = 0.0
+        y = TERRAIN_HEIGHT
+        counter = TERRAIN_STARTPAD
+        oneshot = False
+        self.terrain = []
+        self.terrain_x = []
+        self.terrain_y = []
+
+        stair_steps, stair_width, stair_height = 0, 0, 0
+        original_y = 0
+        for i in range(TERRAIN_LENGTH):
+            x = i * TERRAIN_STEP
+            self.terrain_x.append(x)
+
+            if state == GRASS and not oneshot:
+                velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
+                if i > TERRAIN_STARTPAD:
+                    velocity += self.np_random.uniform(-1, 1) / SCALE  # 1
+                y += velocity
+
+            elif state == PIT and oneshot:
+                counter = self.np_random.integers(3, 5)
+                poly = [
+                    (x, y),
+                    (x + TERRAIN_STEP, y),
+                    (x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
+                    (x, y - 4 * TERRAIN_STEP),
+                ]
+                self.fd_polygon.shape.vertices = poly
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
+                self.terrain.append(t)
+
+                self.fd_polygon.shape.vertices = [
+                    (p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
+                ]
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
+                self.terrain.append(t)
+                counter += 2
+                original_y = y
+
+            elif state == PIT and not oneshot:
+                y = original_y
+                if counter > 1:
+                    y -= 4 * TERRAIN_STEP
+
+            elif state == STUMP and oneshot:
+                counter = self.np_random.integers(1, 3)
+                poly = [
+                    (x, y),
+                    (x + counter * TERRAIN_STEP, y),
+                    (x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
+                    (x, y + counter * TERRAIN_STEP),
+                ]
+                self.fd_polygon.shape.vertices = poly
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
+                self.terrain.append(t)
+
+            elif state == STAIRS and oneshot:
+                stair_height = +1 if self.np_random.random() > 0.5 else -1
+                stair_width = self.np_random.integers(4, 5)
+                stair_steps = self.np_random.integers(3, 5)
+                original_y = y
+                for s in range(stair_steps):
+                    poly = [
+                        (
+                            x + (s * stair_width) * TERRAIN_STEP,
+                            y + (s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + ((1 + s) * stair_width) * TERRAIN_STEP,
+                            y + (s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + ((1 + s) * stair_width) * TERRAIN_STEP,
+                            y + (-1 + s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + (s * stair_width) * TERRAIN_STEP,
+                            y + (-1 + s * stair_height) * TERRAIN_STEP,
+                        ),
+                    ]
+                    self.fd_polygon.shape.vertices = poly
+                    t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                    t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
+                    self.terrain.append(t)
+                counter = stair_steps * stair_width
+
+            elif state == STAIRS and not oneshot:
+                s = stair_steps * stair_width - counter - stair_height
+                n = s / stair_width
+                y = original_y + (n * stair_height) * TERRAIN_STEP
+
+            oneshot = False
+            self.terrain_y.append(y)
+            counter -= 1
+            if counter == 0:
+                counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS)
+                if state == GRASS and hardcore:
+                    state = self.np_random.integers(1, _STATES_)
+                    oneshot = True
+                else:
+                    state = GRASS
+                    oneshot = True
+
+        self.terrain_poly = []
+        for i in range(TERRAIN_LENGTH - 1):
+            poly = [
+                (self.terrain_x[i], self.terrain_y[i]),
+                (self.terrain_x[i + 1], self.terrain_y[i + 1]),
+            ]
+            self.fd_edge.shape.vertices = poly
+            t = self.world.CreateStaticBody(fixtures=self.fd_edge)
+            color = (76, 255 if i % 2 == 0 else 204, 76)
+            t.color1 = color
+            t.color2 = color
+            self.terrain.append(t)
+            color = (102, 153, 76)
+            poly += [(poly[1][0], 0), (poly[0][0], 0)]
+            self.terrain_poly.append((poly, color))
+        self.terrain.reverse()
+
+    def _generate_clouds(self):
+        # Sorry for the clouds, couldn't resist
+        self.cloud_poly = []
+        for i in range(TERRAIN_LENGTH // 20):
+            x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
+            y = VIEWPORT_H / SCALE * 3 / 4
+            poly = [
+                (
+                    x
+                    + 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
+                    + self.np_random.uniform(0, 5 * TERRAIN_STEP),
+                    y
+                    + 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
+                    + self.np_random.uniform(0, 5 * TERRAIN_STEP),
+                )
+                for a in range(5)
+            ]
+            x1 = min(p[0] for p in poly)
+            x2 = max(p[0] for p in poly)
+            self.cloud_poly.append((poly, x1, x2))
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        super().reset(seed=seed)
+        self._destroy()
+        self.world.contactListener_bug_workaround = ContactDetector(self)
+        self.world.contactListener = self.world.contactListener_bug_workaround
+        self.game_over = False
+        self.prev_shaping = None
+        self.scroll = 0.0
+        self.lidar_render = 0
+
+        self._generate_terrain(self.hardcore)
+        self._generate_clouds()
+
+        init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
+        init_y = TERRAIN_HEIGHT + 2 * LEG_H
+        self.hull = self.world.CreateDynamicBody(
+            position=(init_x, init_y), fixtures=HULL_FD
+        )
+        self.hull.color1 = (127, 51, 229)
+        self.hull.color2 = (76, 76, 127)
+        self.hull.ApplyForceToCenter(
+            (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
+        )
+
+        self.legs: List[Box2D.b2Body] = []
+        self.joints: List[Box2D.b2RevoluteJoint] = []
+        for i in [-1, +1]:
+            leg = self.world.CreateDynamicBody(
+                position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
+                angle=(i * 0.05),
+                fixtures=LEG_FD,
+            )
+            leg.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
+            leg.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
+            rjd = revoluteJointDef(
+                bodyA=self.hull,
+                bodyB=leg,
+                localAnchorA=(0, LEG_DOWN),
+                localAnchorB=(0, LEG_H / 2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed=i,
+                lowerAngle=-0.8,
+                upperAngle=1.1,
+            )
+            self.legs.append(leg)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+            lower = self.world.CreateDynamicBody(
+                position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
+                angle=(i * 0.05),
+                fixtures=LOWER_FD,
+            )
+            lower.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
+            lower.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
+            rjd = revoluteJointDef(
+                bodyA=leg,
+                bodyB=lower,
+                localAnchorA=(0, -LEG_H / 2),
+                localAnchorB=(0, LEG_H / 2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed=1,
+                lowerAngle=-1.6,
+                upperAngle=-0.1,
+            )
+            lower.ground_contact = False
+            self.legs.append(lower)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+        self.drawlist = self.terrain + self.legs + [self.hull]
+
+        class LidarCallback(Box2D.b2.rayCastCallback):
+            def ReportFixture(self, fixture, point, normal, fraction):
+                if (fixture.filterData.categoryBits & 1) == 0:
+                    return -1
+                self.p2 = point
+                self.fraction = fraction
+                return fraction
+
+        self.lidar = [LidarCallback() for _ in range(10)]
+        if self.render_mode == "human":
+            self.render()
+        return self.step(np.array([0, 0, 0, 0]))[0], {}
+
+    def step(self, action: np.ndarray):
+        assert self.hull is not None
+
+        # self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
+        control_speed = False  # Should be easier as well
+        if control_speed:
+            self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
+            self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
+            self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
+            self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
+        else:
+            self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
+            self.joints[0].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
+            )
+            self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
+            self.joints[1].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
+            )
+            self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
+            self.joints[2].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
+            )
+            self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
+            self.joints[3].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
+            )
+
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
+
+        pos = self.hull.position
+        vel = self.hull.linearVelocity
+
+        for i in range(10):
+            self.lidar[i].fraction = 1.0
+            self.lidar[i].p1 = pos
+            self.lidar[i].p2 = (
+                pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
+                pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
+            )
+            self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
+
+        state = [
+            self.hull.angle,  # Normal angles up to 0.5 here, but sure more is possible.
+            2.0 * self.hull.angularVelocity / FPS,
+            0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS,  # Normalized to get -1..1 range
+            0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
+            self.joints[0].angle,
+            # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
+            self.joints[0].speed / SPEED_HIP,
+            self.joints[1].angle + 1.0,
+            self.joints[1].speed / SPEED_KNEE,
+            1.0 if self.legs[1].ground_contact else 0.0,
+            self.joints[2].angle,
+            self.joints[2].speed / SPEED_HIP,
+            self.joints[3].angle + 1.0,
+            self.joints[3].speed / SPEED_KNEE,
+            1.0 if self.legs[3].ground_contact else 0.0,
+        ]
+        state += [l.fraction for l in self.lidar]
+        assert len(state) == 24
+
+        self.scroll = pos.x - VIEWPORT_W / SCALE / 5
+
+        shaping = (
+            130 * pos[0] / SCALE
+        )  # moving forward is a way to receive reward (normalized to get 300 on completion)
+        shaping -= 5.0 * abs(
+            state[0]
+        )  # keep head straight, other than that and falling, any behavior is unpunished
+
+        reward = 0
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        for a in action:
+            reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
+            # normalized to about -50.0 using heuristic, more optimal agent should spend less
+
+        terminated = False
+        if self.game_over or pos[0] < 0:
+            reward = -100
+            terminated = True
+        if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
+            terminated = True
+
+        if self.render_mode == "human":
+            self.render()
+        return np.array(state, dtype=np.float32), reward, terminated, False, {}
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError as e:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[box2d]`"
+            ) from e
+
+        if self.screen is None and self.render_mode == "human":
+            pygame.init()
+            pygame.display.init()
+            self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        self.surf = pygame.Surface(
+            (VIEWPORT_W + max(0.0, self.scroll) * SCALE, VIEWPORT_H)
+        )
+
+        pygame.transform.scale(self.surf, (SCALE, SCALE))
+
+        pygame.draw.polygon(
+            self.surf,
+            color=(215, 215, 255),
+            points=[
+                (self.scroll * SCALE, 0),
+                (self.scroll * SCALE + VIEWPORT_W, 0),
+                (self.scroll * SCALE + VIEWPORT_W, VIEWPORT_H),
+                (self.scroll * SCALE, VIEWPORT_H),
+            ],
+        )
+
+        for poly, x1, x2 in self.cloud_poly:
+            if x2 < self.scroll / 2:
+                continue
+            if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
+                continue
+            pygame.draw.polygon(
+                self.surf,
+                color=(255, 255, 255),
+                points=[
+                    (p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly
+                ],
+            )
+            gfxdraw.aapolygon(
+                self.surf,
+                [(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly],
+                (255, 255, 255),
+            )
+        for poly, color in self.terrain_poly:
+            if poly[1][0] < self.scroll:
+                continue
+            if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
+                continue
+            scaled_poly = []
+            for coord in poly:
+                scaled_poly.append([coord[0] * SCALE, coord[1] * SCALE])
+            pygame.draw.polygon(self.surf, color=color, points=scaled_poly)
+            gfxdraw.aapolygon(self.surf, scaled_poly, color)
+
+        self.lidar_render = (self.lidar_render + 1) % 100
+        i = self.lidar_render
+        if i < 2 * len(self.lidar):
+            single_lidar = (
+                self.lidar[i]
+                if i < len(self.lidar)
+                else self.lidar[len(self.lidar) - i - 1]
+            )
+            if hasattr(single_lidar, "p1") and hasattr(single_lidar, "p2"):
+                pygame.draw.line(
+                    self.surf,
+                    color=(255, 0, 0),
+                    start_pos=(single_lidar.p1[0] * SCALE, single_lidar.p1[1] * SCALE),
+                    end_pos=(single_lidar.p2[0] * SCALE, single_lidar.p2[1] * SCALE),
+                    width=1,
+                )
+
+        for obj in self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    pygame.draw.circle(
+                        self.surf,
+                        color=obj.color1,
+                        center=trans * f.shape.pos * SCALE,
+                        radius=f.shape.radius * SCALE,
+                    )
+                    pygame.draw.circle(
+                        self.surf,
+                        color=obj.color2,
+                        center=trans * f.shape.pos * SCALE,
+                        radius=f.shape.radius * SCALE,
+                    )
+                else:
+                    path = [trans * v * SCALE for v in f.shape.vertices]
+                    if len(path) > 2:
+                        pygame.draw.polygon(self.surf, color=obj.color1, points=path)
+                        gfxdraw.aapolygon(self.surf, path, obj.color1)
+                        path.append(path[0])
+                        pygame.draw.polygon(
+                            self.surf, color=obj.color2, points=path, width=1
+                        )
+                        gfxdraw.aapolygon(self.surf, path, obj.color2)
+                    else:
+                        pygame.draw.aaline(
+                            self.surf,
+                            start_pos=path[0],
+                            end_pos=path[1],
+                            color=obj.color1,
+                        )
+
+        flagy1 = TERRAIN_HEIGHT * SCALE
+        flagy2 = flagy1 + 50
+        x = TERRAIN_STEP * 3 * SCALE
+        pygame.draw.aaline(
+            self.surf, color=(0, 0, 0), start_pos=(x, flagy1), end_pos=(x, flagy2)
+        )
+        f = [
+            (x, flagy2),
+            (x, flagy2 - 10),
+            (x + 25, flagy2 - 5),
+        ]
+        pygame.draw.polygon(self.surf, color=(230, 51, 0), points=f)
+        pygame.draw.lines(
+            self.surf, color=(0, 0, 0), points=f + [f[0]], width=1, closed=False
+        )
+
+        self.surf = pygame.transform.flip(self.surf, False, True)
+
+        if self.render_mode == "human":
+            assert self.screen is not None
+            self.screen.blit(self.surf, (-self.scroll * SCALE, 0))
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            pygame.display.flip()
+        elif self.render_mode == "rgb_array":
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
+            )[:, -VIEWPORT_W:]
+
+    def close(self):
+        if self.screen is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
+
+
+class BipedalWalkerHardcore:
+    def __init__(self):
+        raise error.Error(
+            "Error initializing BipedalWalkerHardcore Environment.\n"
+            "Currently, we do not support initializing this mode of environment by calling the class directly.\n"
+            "To use this environment, instead create it by specifying the hardcore keyword in gym.make, i.e.\n"
+            'gym.make("BipedalWalker-v3", hardcore=True)'
+        )
+
+
+if __name__ == "__main__":
+    # Heurisic: suboptimal, have no notion of balance.
+    env = BipedalWalker()
+    env.reset()
+    steps = 0
+    total_reward = 0
+    a = np.array([0.0, 0.0, 0.0, 0.0])
+    STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
+    SPEED = 0.29  # Will fall forward on higher speed
+    state = STAY_ON_ONE_LEG
+    moving_leg = 0
+    supporting_leg = 1 - moving_leg
+    SUPPORT_KNEE_ANGLE = +0.1
+    supporting_knee_angle = SUPPORT_KNEE_ANGLE
+    while True:
+        s, r, terminated, truncated, info = env.step(a)
+        total_reward += r
+        if steps % 20 == 0 or terminated or truncated:
+            print("\naction " + str([f"{x:+0.2f}" for x in a]))
+            print(f"step {steps} total_reward {total_reward:+0.2f}")
+            print("hull " + str([f"{x:+0.2f}" for x in s[0:4]]))
+            print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]]))
+            print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]]))
+        steps += 1
+
+        contact0 = s[8]
+        contact1 = s[13]
+        moving_s_base = 4 + 5 * moving_leg
+        supporting_s_base = 4 + 5 * supporting_leg
+
+        hip_targ = [None, None]  # -0.8 .. +1.1
+        knee_targ = [None, None]  # -0.6 .. +0.9
+        hip_todo = [0.0, 0.0]
+        knee_todo = [0.0, 0.0]
+
+        if state == STAY_ON_ONE_LEG:
+            hip_targ[moving_leg] = 1.1
+            knee_targ[moving_leg] = -0.6
+            supporting_knee_angle += 0.03
+            if s[2] > SPEED:
+                supporting_knee_angle += 0.03
+            supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
+            knee_targ[supporting_leg] = supporting_knee_angle
+            if s[supporting_s_base + 0] < 0.10:  # supporting leg is behind
+                state = PUT_OTHER_DOWN
+        if state == PUT_OTHER_DOWN:
+            hip_targ[moving_leg] = +0.1
+            knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
+            knee_targ[supporting_leg] = supporting_knee_angle
+            if s[moving_s_base + 4]:
+                state = PUSH_OFF
+                supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
+        if state == PUSH_OFF:
+            knee_targ[moving_leg] = supporting_knee_angle
+            knee_targ[supporting_leg] = +1.0
+            if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
+                state = STAY_ON_ONE_LEG
+                moving_leg = 1 - moving_leg
+                supporting_leg = 1 - moving_leg
+
+        if hip_targ[0]:
+            hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
+        if hip_targ[1]:
+            hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
+        if knee_targ[0]:
+            knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
+        if knee_targ[1]:
+            knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
+
+        hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1]  # PID to keep head strait
+        hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
+        knee_todo[0] -= 15.0 * s[3]  # vertical speed, to damp oscillations
+        knee_todo[1] -= 15.0 * s[3]
+
+        a[0] = hip_todo[0]
+        a[1] = knee_todo[0]
+        a[2] = hip_todo[1]
+        a[3] = knee_todo[1]
+        a = np.clip(0.5 * a, -1.0, 1.0)
+
+        if terminated or truncated:
+            break
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/car_dynamics.py
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/car_dynamics.py
@ -0,0 +1,356 @@
+"""
+Top-down car dynamics simulation.
+
+Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
+This simulation is a bit more detailed, with wheels rotation.
+
+Created by Oleg Klimov
+"""
+
+import math
+
+import Box2D
+import numpy as np
+
+from gymnasium.error import DependencyNotInstalled
+
+
+try:
+    from Box2D.b2 import fixtureDef, polygonShape, revoluteJointDef
+except ImportError as e:
+    raise DependencyNotInstalled(
+        "Box2D is not installed, run `pip install gymnasium[box2d]`"
+    ) from e
+
+
+SIZE = 0.02
+ENGINE_POWER = 100000000 * SIZE * SIZE
+WHEEL_MOMENT_OF_INERTIA = 4000 * SIZE * SIZE
+FRICTION_LIMIT = (
+    1000000 * SIZE * SIZE
+)  # friction ~= mass ~= size^2 (calculated implicitly using density)
+WHEEL_R = 27
+WHEEL_W = 14
+WHEELPOS = [(-55, +80), (+55, +80), (-55, -82), (+55, -82)]
+HULL_POLY1 = [(-60, +130), (+60, +130), (+60, +110), (-60, +110)]
+HULL_POLY2 = [(-15, +120), (+15, +120), (+20, +20), (-20, 20)]
+HULL_POLY3 = [
+    (+25, +20),
+    (+50, -10),
+    (+50, -40),
+    (+20, -90),
+    (-20, -90),
+    (-50, -40),
+    (-50, -10),
+    (-25, +20),
+]
+HULL_POLY4 = [(-50, -120), (+50, -120), (+50, -90), (-50, -90)]
+WHEEL_COLOR = (0, 0, 0)
+WHEEL_WHITE = (77, 77, 77)
+MUD_COLOR = (102, 102, 0)
+
+
+class Car:
+    def __init__(self, world, init_angle, init_x, init_y):
+        self.world: Box2D.b2World = world
+        self.hull: Box2D.b2Body = self.world.CreateDynamicBody(
+            position=(init_x, init_y),
+            angle=init_angle,
+            fixtures=[
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY1]
+                    ),
+                    density=1.0,
+                ),
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY2]
+                    ),
+                    density=1.0,
+                ),
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY3]
+                    ),
+                    density=1.0,
+                ),
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY4]
+                    ),
+                    density=1.0,
+                ),
+            ],
+        )
+        self.hull.color = (0.8, 0.0, 0.0)
+        self.wheels = []
+        self.fuel_spent = 0.0
+        WHEEL_POLY = [
+            (-WHEEL_W, +WHEEL_R),
+            (+WHEEL_W, +WHEEL_R),
+            (+WHEEL_W, -WHEEL_R),
+            (-WHEEL_W, -WHEEL_R),
+        ]
+        for wx, wy in WHEELPOS:
+            front_k = 1.0 if wy > 0 else 1.0
+            w = self.world.CreateDynamicBody(
+                position=(init_x + wx * SIZE, init_y + wy * SIZE),
+                angle=init_angle,
+                fixtures=fixtureDef(
+                    shape=polygonShape(
+                        vertices=[
+                            (x * front_k * SIZE, y * front_k * SIZE)
+                            for x, y in WHEEL_POLY
+                        ]
+                    ),
+                    density=0.1,
+                    categoryBits=0x0020,
+                    maskBits=0x001,
+                    restitution=0.0,
+                ),
+            )
+            w.wheel_rad = front_k * WHEEL_R * SIZE
+            w.color = WHEEL_COLOR
+            w.gas = 0.0
+            w.brake = 0.0
+            w.steer = 0.0
+            w.phase = 0.0  # wheel angle
+            w.omega = 0.0  # angular velocity
+            w.skid_start = None
+            w.skid_particle = None
+            rjd = revoluteJointDef(
+                bodyA=self.hull,
+                bodyB=w,
+                localAnchorA=(wx * SIZE, wy * SIZE),
+                localAnchorB=(0, 0),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=180 * 900 * SIZE * SIZE,
+                motorSpeed=0,
+                lowerAngle=-0.4,
+                upperAngle=+0.4,
+            )
+            w.joint = self.world.CreateJoint(rjd)
+            w.tiles = set()
+            w.userData = w
+            self.wheels.append(w)
+        self.drawlist = self.wheels + [self.hull]
+        self.particles = []
+
+    def gas(self, gas):
+        """control: rear wheel drive
+
+        Args:
+            gas (float): How much gas gets applied. Gets clipped between 0 and 1.
+        """
+        gas = np.clip(gas, 0, 1)
+        for w in self.wheels[2:4]:
+            diff = gas - w.gas
+            if diff > 0.1:
+                diff = 0.1  # gradually increase, but stop immediately
+            w.gas += diff
+
+    def brake(self, b):
+        """control: brake
+
+        Args:
+            b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation
+        """
+        for w in self.wheels:
+            w.brake = b
+
+    def steer(self, s):
+        """control: steer
+
+        Args:
+            s (-1..1): target position, it takes time to rotate steering wheel from side-to-side
+        """
+        self.wheels[0].steer = s
+        self.wheels[1].steer = s
+
+    def step(self, dt):
+        for w in self.wheels:
+            # Steer each wheel
+            dir = np.sign(w.steer - w.joint.angle)
+            val = abs(w.steer - w.joint.angle)
+            w.joint.motorSpeed = dir * min(50.0 * val, 3.0)
+
+            # Position => friction_limit
+            grass = True
+            friction_limit = FRICTION_LIMIT * 0.6  # Grass friction if no tile
+            for tile in w.tiles:
+                friction_limit = max(
+                    friction_limit, FRICTION_LIMIT * tile.road_friction
+                )
+                grass = False
+
+            # Force
+            forw = w.GetWorldVector((0, 1))
+            side = w.GetWorldVector((1, 0))
+            v = w.linearVelocity
+            vf = forw[0] * v[0] + forw[1] * v[1]  # forward speed
+            vs = side[0] * v[0] + side[1] * v[1]  # side speed
+
+            # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
+            # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
+            # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
+
+            # add small coef not to divide by zero
+            w.omega += (
+                dt
+                * ENGINE_POWER
+                * w.gas
+                / WHEEL_MOMENT_OF_INERTIA
+                / (abs(w.omega) + 5.0)
+            )
+            self.fuel_spent += dt * ENGINE_POWER * w.gas
+
+            if w.brake >= 0.9:
+                w.omega = 0
+            elif w.brake > 0:
+                BRAKE_FORCE = 15  # radians per second
+                dir = -np.sign(w.omega)
+                val = BRAKE_FORCE * w.brake
+                if abs(val) > abs(w.omega):
+                    val = abs(w.omega)  # low speed => same as = 0
+                w.omega += dir * val
+            w.phase += w.omega * dt
+
+            vr = w.omega * w.wheel_rad  # rotating wheel speed
+            f_force = -vf + vr  # force direction is direction of speed difference
+            p_force = -vs
+
+            # Physically correct is to always apply friction_limit until speed is equal.
+            # But dt is finite, that will lead to oscillations if difference is already near zero.
+
+            # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
+            f_force *= 205000 * SIZE * SIZE
+            p_force *= 205000 * SIZE * SIZE
+            force = np.sqrt(np.square(f_force) + np.square(p_force))
+
+            # Skid trace
+            if abs(force) > 2.0 * friction_limit:
+                if (
+                    w.skid_particle
+                    and w.skid_particle.grass == grass
+                    and len(w.skid_particle.poly) < 30
+                ):
+                    w.skid_particle.poly.append((w.position[0], w.position[1]))
+                elif w.skid_start is None:
+                    w.skid_start = w.position
+                else:
+                    w.skid_particle = self._create_particle(
+                        w.skid_start, w.position, grass
+                    )
+                    w.skid_start = None
+            else:
+                w.skid_start = None
+                w.skid_particle = None
+
+            if abs(force) > friction_limit:
+                f_force /= force
+                p_force /= force
+                force = friction_limit  # Correct physics here
+                f_force *= force
+                p_force *= force
+
+            w.omega -= dt * f_force * w.wheel_rad / WHEEL_MOMENT_OF_INERTIA
+
+            w.ApplyForceToCenter(
+                (
+                    p_force * side[0] + f_force * forw[0],
+                    p_force * side[1] + f_force * forw[1],
+                ),
+                True,
+            )
+
+    def draw(self, surface, zoom, translation, angle, draw_particles=True):
+        import pygame.draw
+
+        if draw_particles:
+            for p in self.particles:
+                poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in p.poly]
+                poly = [
+                    (
+                        coords[0] * zoom + translation[0],
+                        coords[1] * zoom + translation[1],
+                    )
+                    for coords in poly
+                ]
+                pygame.draw.lines(
+                    surface, color=p.color, points=poly, width=2, closed=False
+                )
+
+        for obj in self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                path = [trans * v for v in f.shape.vertices]
+                path = [(coords[0], coords[1]) for coords in path]
+                path = [pygame.math.Vector2(c).rotate_rad(angle) for c in path]
+                path = [
+                    (
+                        coords[0] * zoom + translation[0],
+                        coords[1] * zoom + translation[1],
+                    )
+                    for coords in path
+                ]
+                color = [int(c * 255) for c in obj.color]
+
+                pygame.draw.polygon(surface, color=color, points=path)
+
+                if "phase" not in obj.__dict__:
+                    continue
+                a1 = obj.phase
+                a2 = obj.phase + 1.2  # radians
+                s1 = math.sin(a1)
+                s2 = math.sin(a2)
+                c1 = math.cos(a1)
+                c2 = math.cos(a2)
+                if s1 > 0 and s2 > 0:
+                    continue
+                if s1 > 0:
+                    c1 = np.sign(c1)
+                if s2 > 0:
+                    c2 = np.sign(c2)
+                white_poly = [
+                    (-WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
+                    (+WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
+                    (+WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
+                    (-WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
+                ]
+                white_poly = [trans * v for v in white_poly]
+
+                white_poly = [(coords[0], coords[1]) for coords in white_poly]
+                white_poly = [
+                    pygame.math.Vector2(c).rotate_rad(angle) for c in white_poly
+                ]
+                white_poly = [
+                    (
+                        coords[0] * zoom + translation[0],
+                        coords[1] * zoom + translation[1],
+                    )
+                    for coords in white_poly
+                ]
+                pygame.draw.polygon(surface, color=WHEEL_WHITE, points=white_poly)
+
+    def _create_particle(self, point1, point2, grass):
+        class Particle:
+            pass
+
+        p = Particle()
+        p.color = WHEEL_COLOR if not grass else MUD_COLOR
+        p.ttl = 1
+        p.poly = [(point1[0], point1[1]), (point2[0], point2[1])]
+        p.grass = grass
+        self.particles.append(p)
+        while len(self.particles) > 30:
+            self.particles.pop(0)
+        return p
+
+    def destroy(self):
+        self.world.DestroyBody(self.hull)
+        self.hull = None
+        for w in self.wheels:
+            self.world.DestroyBody(w)
+        self.wheels = []
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/car_racing.py
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/car_racing.py
@ -0,0 +1,841 @@
+__credits__ = ["Andrea PIERRÉ"]
+
+import math
+from typing import Optional, Union
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.envs.box2d.car_dynamics import Car
+from gymnasium.error import DependencyNotInstalled, InvalidAction
+from gymnasium.utils import EzPickle
+
+
+try:
+    import Box2D
+    from Box2D.b2 import contactListener, fixtureDef, polygonShape
+except ImportError as e:
+    raise DependencyNotInstalled(
+        "Box2D is not installed, run `pip install gymnasium[box2d]`"
+    ) from e
+
+try:
+    # As pygame is necessary for using the environment (reset and step) even without a render mode
+    #   therefore, pygame is a necessary import for the environment.
+    import pygame
+    from pygame import gfxdraw
+except ImportError as e:
+    raise DependencyNotInstalled(
+        "pygame is not installed, run `pip install gymnasium[box2d]`"
+    ) from e
+
+
+STATE_W = 96  # less than Atari 160x192
+STATE_H = 96
+VIDEO_W = 600
+VIDEO_H = 400
+WINDOW_W = 1000
+WINDOW_H = 800
+
+SCALE = 6.0  # Track scale
+TRACK_RAD = 900 / SCALE  # Track is heavily morphed circle with this radius
+PLAYFIELD = 2000 / SCALE  # Game over boundary
+FPS = 50  # Frames per second
+ZOOM = 2.7  # Camera zoom
+ZOOM_FOLLOW = True  # Set to False for fixed view (don't use zoom)
+
+
+TRACK_DETAIL_STEP = 21 / SCALE
+TRACK_TURN_RATE = 0.31
+TRACK_WIDTH = 40 / SCALE
+BORDER = 8 / SCALE
+BORDER_MIN_COUNT = 4
+GRASS_DIM = PLAYFIELD / 20.0
+MAX_SHAPE_DIM = (
+    max(GRASS_DIM, TRACK_WIDTH, TRACK_DETAIL_STEP) * math.sqrt(2) * ZOOM * SCALE
+)
+
+
+class FrictionDetector(contactListener):
+    def __init__(self, env, lap_complete_percent):
+        contactListener.__init__(self)
+        self.env = env
+        self.lap_complete_percent = lap_complete_percent
+
+    def BeginContact(self, contact):
+        self._contact(contact, True)
+
+    def EndContact(self, contact):
+        self._contact(contact, False)
+
+    def _contact(self, contact, begin):
+        tile = None
+        obj = None
+        u1 = contact.fixtureA.body.userData
+        u2 = contact.fixtureB.body.userData
+        if u1 and "road_friction" in u1.__dict__:
+            tile = u1
+            obj = u2
+        if u2 and "road_friction" in u2.__dict__:
+            tile = u2
+            obj = u1
+        if not tile:
+            return
+
+        # inherit tile color from env
+        tile.color[:] = self.env.road_color
+        if not obj or "tiles" not in obj.__dict__:
+            return
+        if begin:
+            obj.tiles.add(tile)
+            if not tile.road_visited:
+                tile.road_visited = True
+                self.env.reward += 1000.0 / len(self.env.track)
+                self.env.tile_visited_count += 1
+
+                # Lap is considered completed if enough % of the track was covered
+                if (
+                    tile.idx == 0
+                    and self.env.tile_visited_count / len(self.env.track)
+                    > self.lap_complete_percent
+                ):
+                    self.env.new_lap = True
+        else:
+            obj.tiles.remove(tile)
+
+
+class CarRacing(gym.Env, EzPickle):
+    """
+    ## Description
+    The easiest control task to learn from pixels - a top-down
+    racing environment. The generated track is random every episode.
+
+    Some indicators are shown at the bottom of the window along with the
+    state RGB buffer. From left to right: true speed, four ABS sensors,
+    steering wheel position, and gyroscope.
+    To play yourself (it's rather fast for humans), type:
+    ```
+    python gymnasium/envs/box2d/car_racing.py
+    ```
+    Remember: it's a powerful rear-wheel drive car - don't press the accelerator
+    and turn at the same time.
+
+    ## Action Space
+    If continuous there are 3 actions :
+    - 0: steering, -1 is full left, +1 is full right
+    - 1: gas
+    - 2: breaking
+
+    If discrete there are 5 actions:
+    - 0: do nothing
+    - 1: steer left
+    - 2: steer right
+    - 3: gas
+    - 4: brake
+
+    ## Observation Space
+
+    A top-down 96x96 RGB image of the car and race track.
+
+    ## Rewards
+    The reward is -0.1 every frame and +1000/N for every track tile visited,
+    where N is the total number of tiles visited in the track. For example,
+    if you have finished in 732 frames, your reward is
+    1000 - 0.1*732 = 926.8 points.
+
+    ## Starting State
+    The car starts at rest in the center of the road.
+
+    ## Episode Termination
+    The episode finishes when all the tiles are visited. The car can also go
+    outside the playfield - that is, far off the track, in which case it will
+    receive -100 reward and die.
+
+    ## Arguments
+    `lap_complete_percent` dictates the percentage of tiles that must be visited by
+    the agent before a lap is considered complete.
+
+    Passing `domain_randomize=True` enables the domain randomized variant of the environment.
+    In this scenario, the background and track colours are different on every reset.
+
+    Passing `continuous=False` converts the environment to use discrete action space.
+    The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
+
+    ## Reset Arguments
+    Passing the option `options["randomize"] = True` will change the current colour of the environment on demand.
+    Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment.
+    `domain_randomize` must be `True` on init for this argument to work.
+    Example usage:
+    ```python
+    import gymnasium as gym
+    env = gym.make("CarRacing-v1", domain_randomize=True)
+
+    # normal reset, this changes the colour scheme by default
+    env.reset()
+
+    # reset with colour scheme change
+    env.reset(options={"randomize": True})
+
+    # reset with no colour scheme change
+    env.reset(options={"randomize": False})
+    ```
+
+    ## Version History
+    - v1: Change track completion logic and add domain randomization (0.24.0)
+    - v0: Original version
+
+    ## References
+    - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.
+
+    ## Credits
+    Created by Oleg Klimov
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "state_pixels",
+        ],
+        "render_fps": FPS,
+    }
+
+    def __init__(
+        self,
+        render_mode: Optional[str] = None,
+        verbose: bool = False,
+        lap_complete_percent: float = 0.95,
+        domain_randomize: bool = False,
+        continuous: bool = True,
+    ):
+        EzPickle.__init__(
+            self,
+            render_mode,
+            verbose,
+            lap_complete_percent,
+            domain_randomize,
+            continuous,
+        )
+        self.continuous = continuous
+        self.domain_randomize = domain_randomize
+        self.lap_complete_percent = lap_complete_percent
+        self._init_colors()
+
+        self.contactListener_keepref = FrictionDetector(self, self.lap_complete_percent)
+        self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
+        self.screen: Optional[pygame.Surface] = None
+        self.surf = None
+        self.clock = None
+        self.isopen = True
+        self.invisible_state_window = None
+        self.invisible_video_window = None
+        self.road = None
+        self.car: Optional[Car] = None
+        self.reward = 0.0
+        self.prev_reward = 0.0
+        self.verbose = verbose
+        self.new_lap = False
+        self.fd_tile = fixtureDef(
+            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
+        )
+
+        # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
+        #   or normalised however this is not possible here so ignore
+        if self.continuous:
+            self.action_space = spaces.Box(
+                np.array([-1, 0, 0]).astype(np.float32),
+                np.array([+1, +1, +1]).astype(np.float32),
+            )  # steer, gas, brake
+        else:
+            self.action_space = spaces.Discrete(5)
+            # do nothing, left, right, gas, brake
+
+        self.observation_space = spaces.Box(
+            low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
+        )
+
+        self.render_mode = render_mode
+
+    def _destroy(self):
+        if not self.road:
+            return
+        for t in self.road:
+            self.world.DestroyBody(t)
+        self.road = []
+        assert self.car is not None
+        self.car.destroy()
+
+    def _init_colors(self):
+        if self.domain_randomize:
+            # domain randomize the bg and grass colour
+            self.road_color = self.np_random.uniform(0, 210, size=3)
+
+            self.bg_color = self.np_random.uniform(0, 210, size=3)
+
+            self.grass_color = np.copy(self.bg_color)
+            idx = self.np_random.integers(3)
+            self.grass_color[idx] += 20
+        else:
+            # default colours
+            self.road_color = np.array([102, 102, 102])
+            self.bg_color = np.array([102, 204, 102])
+            self.grass_color = np.array([102, 230, 102])
+
+    def _reinit_colors(self, randomize):
+        assert (
+            self.domain_randomize
+        ), "domain_randomize must be True to use this function."
+
+        if randomize:
+            # domain randomize the bg and grass colour
+            self.road_color = self.np_random.uniform(0, 210, size=3)
+
+            self.bg_color = self.np_random.uniform(0, 210, size=3)
+
+            self.grass_color = np.copy(self.bg_color)
+            idx = self.np_random.integers(3)
+            self.grass_color[idx] += 20
+
+    def _create_track(self):
+        CHECKPOINTS = 12
+
+        # Create checkpoints
+        checkpoints = []
+        for c in range(CHECKPOINTS):
+            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
+            alpha = 2 * math.pi * c / CHECKPOINTS + noise
+            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
+
+            if c == 0:
+                alpha = 0
+                rad = 1.5 * TRACK_RAD
+            if c == CHECKPOINTS - 1:
+                alpha = 2 * math.pi * c / CHECKPOINTS
+                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
+                rad = 1.5 * TRACK_RAD
+
+            checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
+        self.road = []
+
+        # Go from one checkpoint to another to create track
+        x, y, beta = 1.5 * TRACK_RAD, 0, 0
+        dest_i = 0
+        laps = 0
+        track = []
+        no_freeze = 2500
+        visited_other_side = False
+        while True:
+            alpha = math.atan2(y, x)
+            if visited_other_side and alpha > 0:
+                laps += 1
+                visited_other_side = False
+            if alpha < 0:
+                visited_other_side = True
+                alpha += 2 * math.pi
+
+            while True:  # Find destination from checkpoints
+                failed = True
+
+                while True:
+                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
+                    if alpha <= dest_alpha:
+                        failed = False
+                        break
+                    dest_i += 1
+                    if dest_i % len(checkpoints) == 0:
+                        break
+
+                if not failed:
+                    break
+
+                alpha -= 2 * math.pi
+                continue
+
+            r1x = math.cos(beta)
+            r1y = math.sin(beta)
+            p1x = -r1y
+            p1y = r1x
+            dest_dx = dest_x - x  # vector towards destination
+            dest_dy = dest_y - y
+            # destination vector projected on rad:
+            proj = r1x * dest_dx + r1y * dest_dy
+            while beta - alpha > 1.5 * math.pi:
+                beta -= 2 * math.pi
+            while beta - alpha < -1.5 * math.pi:
+                beta += 2 * math.pi
+            prev_beta = beta
+            proj *= SCALE
+            if proj > 0.3:
+                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
+            if proj < -0.3:
+                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
+            x += p1x * TRACK_DETAIL_STEP
+            y += p1y * TRACK_DETAIL_STEP
+            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
+            if laps > 4:
+                break
+            no_freeze -= 1
+            if no_freeze == 0:
+                break
+
+        # Find closed loop range i1..i2, first loop should be ignored, second is OK
+        i1, i2 = -1, -1
+        i = len(track)
+        while True:
+            i -= 1
+            if i == 0:
+                return False  # Failed
+            pass_through_start = (
+                track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
+            )
+            if pass_through_start and i2 == -1:
+                i2 = i
+            elif pass_through_start and i1 == -1:
+                i1 = i
+                break
+        if self.verbose:
+            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
+        assert i1 != -1
+        assert i2 != -1
+
+        track = track[i1 : i2 - 1]
+
+        first_beta = track[0][1]
+        first_perp_x = math.cos(first_beta)
+        first_perp_y = math.sin(first_beta)
+        # Length of perpendicular jump to put together head and tail
+        well_glued_together = np.sqrt(
+            np.square(first_perp_x * (track[0][2] - track[-1][2]))
+            + np.square(first_perp_y * (track[0][3] - track[-1][3]))
+        )
+        if well_glued_together > TRACK_DETAIL_STEP:
+            return False
+
+        # Red-white border on hard turns
+        border = [False] * len(track)
+        for i in range(len(track)):
+            good = True
+            oneside = 0
+            for neg in range(BORDER_MIN_COUNT):
+                beta1 = track[i - neg - 0][1]
+                beta2 = track[i - neg - 1][1]
+                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
+                oneside += np.sign(beta1 - beta2)
+            good &= abs(oneside) == BORDER_MIN_COUNT
+            border[i] = good
+        for i in range(len(track)):
+            for neg in range(BORDER_MIN_COUNT):
+                border[i - neg] |= border[i]
+
+        # Create tiles
+        for i in range(len(track)):
+            alpha1, beta1, x1, y1 = track[i]
+            alpha2, beta2, x2, y2 = track[i - 1]
+            road1_l = (
+                x1 - TRACK_WIDTH * math.cos(beta1),
+                y1 - TRACK_WIDTH * math.sin(beta1),
+            )
+            road1_r = (
+                x1 + TRACK_WIDTH * math.cos(beta1),
+                y1 + TRACK_WIDTH * math.sin(beta1),
+            )
+            road2_l = (
+                x2 - TRACK_WIDTH * math.cos(beta2),
+                y2 - TRACK_WIDTH * math.sin(beta2),
+            )
+            road2_r = (
+                x2 + TRACK_WIDTH * math.cos(beta2),
+                y2 + TRACK_WIDTH * math.sin(beta2),
+            )
+            vertices = [road1_l, road1_r, road2_r, road2_l]
+            self.fd_tile.shape.vertices = vertices
+            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
+            t.userData = t
+            c = 0.01 * (i % 3) * 255
+            t.color = self.road_color + c
+            t.road_visited = False
+            t.road_friction = 1.0
+            t.idx = i
+            t.fixtures[0].sensor = True
+            self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
+            self.road.append(t)
+            if border[i]:
+                side = np.sign(beta2 - beta1)
+                b1_l = (
+                    x1 + side * TRACK_WIDTH * math.cos(beta1),
+                    y1 + side * TRACK_WIDTH * math.sin(beta1),
+                )
+                b1_r = (
+                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
+                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
+                )
+                b2_l = (
+                    x2 + side * TRACK_WIDTH * math.cos(beta2),
+                    y2 + side * TRACK_WIDTH * math.sin(beta2),
+                )
+                b2_r = (
+                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
+                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
+                )
+                self.road_poly.append(
+                    (
+                        [b1_l, b1_r, b2_r, b2_l],
+                        (255, 255, 255) if i % 2 == 0 else (255, 0, 0),
+                    )
+                )
+        self.track = track
+        return True
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        super().reset(seed=seed)
+        self._destroy()
+        self.world.contactListener_bug_workaround = FrictionDetector(
+            self, self.lap_complete_percent
+        )
+        self.world.contactListener = self.world.contactListener_bug_workaround
+        self.reward = 0.0
+        self.prev_reward = 0.0
+        self.tile_visited_count = 0
+        self.t = 0.0
+        self.new_lap = False
+        self.road_poly = []
+
+        if self.domain_randomize:
+            randomize = True
+            if isinstance(options, dict):
+                if "randomize" in options:
+                    randomize = options["randomize"]
+
+            self._reinit_colors(randomize)
+
+        while True:
+            success = self._create_track()
+            if success:
+                break
+            if self.verbose:
+                print(
+                    "retry to generate track (normal if there are not many"
+                    "instances of this message)"
+                )
+        self.car = Car(self.world, *self.track[0][1:4])
+
+        if self.render_mode == "human":
+            self.render()
+        return self.step(None)[0], {}
+
+    def step(self, action: Union[np.ndarray, int]):
+        assert self.car is not None
+        if action is not None:
+            if self.continuous:
+                self.car.steer(-action[0])
+                self.car.gas(action[1])
+                self.car.brake(action[2])
+            else:
+                if not self.action_space.contains(action):
+                    raise InvalidAction(
+                        f"you passed the invalid action `{action}`. "
+                        f"The supported action_space is `{self.action_space}`"
+                    )
+                self.car.steer(-0.6 * (action == 1) + 0.6 * (action == 2))
+                self.car.gas(0.2 * (action == 3))
+                self.car.brake(0.8 * (action == 4))
+
+        self.car.step(1.0 / FPS)
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
+        self.t += 1.0 / FPS
+
+        self.state = self._render("state_pixels")
+
+        step_reward = 0
+        terminated = False
+        truncated = False
+        if action is not None:  # First step without action, called from reset()
+            self.reward -= 0.1
+            # We actually don't want to count fuel spent, we want car to be faster.
+            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
+            self.car.fuel_spent = 0.0
+            step_reward = self.reward - self.prev_reward
+            self.prev_reward = self.reward
+            if self.tile_visited_count == len(self.track) or self.new_lap:
+                # Truncation due to finishing lap
+                # This should not be treated as a failure
+                # but like a timeout
+                truncated = True
+            x, y = self.car.hull.position
+            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
+                terminated = True
+                step_reward = -100
+
+        if self.render_mode == "human":
+            self.render()
+        return self.state, step_reward, terminated, truncated, {}
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+        else:
+            return self._render(self.render_mode)
+
+    def _render(self, mode: str):
+        assert mode in self.metadata["render_modes"]
+
+        pygame.font.init()
+        if self.screen is None and mode == "human":
+            pygame.init()
+            pygame.display.init()
+            self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        if "t" not in self.__dict__:
+            return  # reset() not called yet
+
+        self.surf = pygame.Surface((WINDOW_W, WINDOW_H))
+
+        assert self.car is not None
+        # computing transformations
+        angle = -self.car.hull.angle
+        # Animating first second zoom.
+        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
+        scroll_x = -(self.car.hull.position[0]) * zoom
+        scroll_y = -(self.car.hull.position[1]) * zoom
+        trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
+        trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])
+
+        self._render_road(zoom, trans, angle)
+        self.car.draw(
+            self.surf,
+            zoom,
+            trans,
+            angle,
+            mode not in ["state_pixels_list", "state_pixels"],
+        )
+
+        self.surf = pygame.transform.flip(self.surf, False, True)
+
+        # showing stats
+        self._render_indicators(WINDOW_W, WINDOW_H)
+
+        font = pygame.font.Font(pygame.font.get_default_font(), 42)
+        text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0))
+        text_rect = text.get_rect()
+        text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0)
+        self.surf.blit(text, text_rect)
+
+        if mode == "human":
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            assert self.screen is not None
+            self.screen.fill(0)
+            self.screen.blit(self.surf, (0, 0))
+            pygame.display.flip()
+        elif mode == "rgb_array":
+            return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H))
+        elif mode == "state_pixels":
+            return self._create_image_array(self.surf, (STATE_W, STATE_H))
+        else:
+            return self.isopen
+
+    def _render_road(self, zoom, translation, angle):
+        bounds = PLAYFIELD
+        field = [
+            (bounds, bounds),
+            (bounds, -bounds),
+            (-bounds, -bounds),
+            (-bounds, bounds),
+        ]
+
+        # draw background
+        self._draw_colored_polygon(
+            self.surf, field, self.bg_color, zoom, translation, angle, clip=False
+        )
+
+        # draw grass patches
+        grass = []
+        for x in range(-20, 20, 2):
+            for y in range(-20, 20, 2):
+                grass.append(
+                    [
+                        (GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + 0),
+                        (GRASS_DIM * x + 0, GRASS_DIM * y + 0),
+                        (GRASS_DIM * x + 0, GRASS_DIM * y + GRASS_DIM),
+                        (GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + GRASS_DIM),
+                    ]
+                )
+        for poly in grass:
+            self._draw_colored_polygon(
+                self.surf, poly, self.grass_color, zoom, translation, angle
+            )
+
+        # draw road
+        for poly, color in self.road_poly:
+            # converting to pixel coordinates
+            poly = [(p[0], p[1]) for p in poly]
+            color = [int(c) for c in color]
+            self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle)
+
+    def _render_indicators(self, W, H):
+        s = W / 40.0
+        h = H / 40.0
+        color = (0, 0, 0)
+        polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)]
+        pygame.draw.polygon(self.surf, color=color, points=polygon)
+
+        def vertical_ind(place, val):
+            return [
+                (place * s, H - (h + h * val)),
+                ((place + 1) * s, H - (h + h * val)),
+                ((place + 1) * s, H - h),
+                ((place + 0) * s, H - h),
+            ]
+
+        def horiz_ind(place, val):
+            return [
+                ((place + 0) * s, H - 4 * h),
+                ((place + val) * s, H - 4 * h),
+                ((place + val) * s, H - 2 * h),
+                ((place + 0) * s, H - 2 * h),
+            ]
+
+        assert self.car is not None
+        true_speed = np.sqrt(
+            np.square(self.car.hull.linearVelocity[0])
+            + np.square(self.car.hull.linearVelocity[1])
+        )
+
+        # simple wrapper to render if the indicator value is above a threshold
+        def render_if_min(value, points, color):
+            if abs(value) > 1e-4:
+                pygame.draw.polygon(self.surf, points=points, color=color)
+
+        render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255))
+        # ABS sensors
+        render_if_min(
+            self.car.wheels[0].omega,
+            vertical_ind(7, 0.01 * self.car.wheels[0].omega),
+            (0, 0, 255),
+        )
+        render_if_min(
+            self.car.wheels[1].omega,
+            vertical_ind(8, 0.01 * self.car.wheels[1].omega),
+            (0, 0, 255),
+        )
+        render_if_min(
+            self.car.wheels[2].omega,
+            vertical_ind(9, 0.01 * self.car.wheels[2].omega),
+            (51, 0, 255),
+        )
+        render_if_min(
+            self.car.wheels[3].omega,
+            vertical_ind(10, 0.01 * self.car.wheels[3].omega),
+            (51, 0, 255),
+        )
+
+        render_if_min(
+            self.car.wheels[0].joint.angle,
+            horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle),
+            (0, 255, 0),
+        )
+        render_if_min(
+            self.car.hull.angularVelocity,
+            horiz_ind(30, -0.8 * self.car.hull.angularVelocity),
+            (255, 0, 0),
+        )
+
+    def _draw_colored_polygon(
+        self, surface, poly, color, zoom, translation, angle, clip=True
+    ):
+        poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly]
+        poly = [
+            (c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly
+        ]
+        # This checks if the polygon is out of bounds of the screen, and we skip drawing if so.
+        # Instead of calculating exactly if the polygon and screen overlap,
+        # we simply check if the polygon is in a larger bounding box whose dimension
+        # is greater than the screen by MAX_SHAPE_DIM, which is the maximum
+        # diagonal length of an environment object
+        if not clip or any(
+            (-MAX_SHAPE_DIM <= coord[0] <= WINDOW_W + MAX_SHAPE_DIM)
+            and (-MAX_SHAPE_DIM <= coord[1] <= WINDOW_H + MAX_SHAPE_DIM)
+            for coord in poly
+        ):
+            gfxdraw.aapolygon(self.surf, poly, color)
+            gfxdraw.filled_polygon(self.surf, poly, color)
+
+    def _create_image_array(self, screen, size):
+        scaled_screen = pygame.transform.smoothscale(screen, size)
+        return np.transpose(
+            np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2)
+        )
+
+    def close(self):
+        if self.screen is not None:
+            pygame.display.quit()
+            self.isopen = False
+            pygame.quit()
+
+
+if __name__ == "__main__":
+    a = np.array([0.0, 0.0, 0.0])
+
+    def register_input():
+        global quit, restart
+        for event in pygame.event.get():
+            if event.type == pygame.KEYDOWN:
+                if event.key == pygame.K_LEFT:
+                    a[0] = -1.0
+                if event.key == pygame.K_RIGHT:
+                    a[0] = +1.0
+                if event.key == pygame.K_UP:
+                    a[1] = +1.0
+                if event.key == pygame.K_DOWN:
+                    a[2] = +0.8  # set 1.0 for wheels to block to zero rotation
+                if event.key == pygame.K_RETURN:
+                    restart = True
+                if event.key == pygame.K_ESCAPE:
+                    quit = True
+
+            if event.type == pygame.KEYUP:
+                if event.key == pygame.K_LEFT:
+                    a[0] = 0
+                if event.key == pygame.K_RIGHT:
+                    a[0] = 0
+                if event.key == pygame.K_UP:
+                    a[1] = 0
+                if event.key == pygame.K_DOWN:
+                    a[2] = 0
+
+            if event.type == pygame.QUIT:
+                quit = True
+
+    env = CarRacing(render_mode="human")
+
+    quit = False
+    while not quit:
+        env.reset()
+        total_reward = 0.0
+        steps = 0
+        restart = False
+        while True:
+            register_input()
+            s, r, terminated, truncated, info = env.step(a)
+            total_reward += r
+            if steps % 200 == 0 or terminated or truncated:
+                print("\naction " + str([f"{x:+0.2f}" for x in a]))
+                print(f"step {steps} total_reward {total_reward:+0.2f}")
+            steps += 1
+            if terminated or truncated or restart or quit:
+                break
+    env.close()
--- a/rl/Lib/site-packages/gymnasium/envs/box2d/lunar_lander.py
+++ b/rl/Lib/site-packages/gymnasium/envs/box2d/lunar_lander.py
@ -0,0 +1,894 @@
+__credits__ = ["Andrea PIERRÉ"]
+
+import math
+import warnings
+from typing import TYPE_CHECKING, Optional
+
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import error, spaces
+from gymnasium.error import DependencyNotInstalled
+from gymnasium.utils import EzPickle, colorize
+from gymnasium.utils.step_api_compatibility import step_api_compatibility
+
+
+try:
+    import Box2D
+    from Box2D.b2 import (
+        circleShape,
+        contactListener,
+        edgeShape,
+        fixtureDef,
+        polygonShape,
+        revoluteJointDef,
+    )
+except ImportError as e:
+    raise DependencyNotInstalled(
+        "Box2D is not installed, run `pip install gymnasium[box2d]`"
+    ) from e
+
+
+if TYPE_CHECKING:
+    import pygame
+
+
+FPS = 50
+SCALE = 30.0  # affects how fast-paced the game is, forces should be adjusted as well
+
+MAIN_ENGINE_POWER = 13.0
+SIDE_ENGINE_POWER = 0.6
+
+INITIAL_RANDOM = 1000.0  # Set 1500 to make game harder
+
+LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)]
+LEG_AWAY = 20
+LEG_DOWN = 18
+LEG_W, LEG_H = 2, 8
+LEG_SPRING_TORQUE = 40
+
+SIDE_ENGINE_HEIGHT = 14
+SIDE_ENGINE_AWAY = 12
+MAIN_ENGINE_Y_LOCATION = (
+    4  # The Y location of the main engine on the body of the Lander.
+)
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+
+class ContactDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+
+    def BeginContact(self, contact):
+        if (
+            self.env.lander == contact.fixtureA.body
+            or self.env.lander == contact.fixtureB.body
+        ):
+            self.env.game_over = True
+        for i in range(2):
+            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
+                self.env.legs[i].ground_contact = True
+
+    def EndContact(self, contact):
+        for i in range(2):
+            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
+                self.env.legs[i].ground_contact = False
+
+
+class LunarLander(gym.Env, EzPickle):
+    """
+    ## Description
+    This environment is a classic rocket trajectory optimization problem.
+    According to Pontryagin's maximum principle, it is optimal to fire the
+    engine at full throttle or turn it off. This is the reason why this
+    environment has discrete actions: engine on or off.
+
+    There are two environment versions: discrete or continuous.
+    The landing pad is always at coordinates (0,0). The coordinates are the
+    first two numbers in the state vector.
+    Landing outside of the landing pad is possible. Fuel is infinite, so an agent
+    can learn to fly and then land on its first attempt.
+
+    To see a heuristic landing, run:
+    ```
+    python gymnasium/envs/box2d/lunar_lander.py
+    ```
+    <!-- To play yourself, run: -->
+    <!-- python examples/agents/keyboard_agent.py LunarLander-v2 -->
+
+    ## Action Space
+    There are four discrete actions available:
+    - 0: do nothing
+    - 1: fire left orientation engine
+    - 2: fire main engine
+    - 3: fire right orientation engine
+
+    ## Observation Space
+    The state is an 8-dimensional vector: the coordinates of the lander in `x` & `y`, its linear
+    velocities in `x` & `y`, its angle, its angular velocity, and two booleans
+    that represent whether each leg is in contact with the ground or not.
+
+    ## Rewards
+    After every step a reward is granted. The total reward of an episode is the
+    sum of the rewards for all the steps within that episode.
+
+    For each step, the reward:
+    - is increased/decreased the closer/further the lander is to the landing pad.
+    - is increased/decreased the slower/faster the lander is moving.
+    - is decreased the more the lander is tilted (angle not horizontal).
+    - is increased by 10 points for each leg that is in contact with the ground.
+    - is decreased by 0.03 points each frame a side engine is firing.
+    - is decreased by 0.3 points each frame the main engine is firing.
+
+    The episode receive an additional reward of -100 or +100 points for crashing or landing safely respectively.
+
+    An episode is considered a solution if it scores at least 200 points.
+
+    ## Starting State
+    The lander starts at the top center of the viewport with a random initial
+    force applied to its center of mass.
+
+    ## Episode Termination
+    The episode finishes if:
+    1) the lander crashes (the lander body gets in contact with the moon);
+    2) the lander gets outside of the viewport (`x` coordinate is greater than 1);
+    3) the lander is not awake. From the [Box2D docs](https://box2d.org/documentation/md__d_1__git_hub_box2d_docs_dynamics.html#autotoc_md61),
+        a body which is not awake is a body which doesn't move and doesn't
+        collide with any other body:
+    > When Box2D determines that a body (or group of bodies) has come to rest,
+    > the body enters a sleep state which has very little CPU overhead. If a
+    > body is awake and collides with a sleeping body, then the sleeping body
+    > wakes up. Bodies will also wake up if a joint or contact attached to
+    > them is destroyed.
+
+    ## Arguments
+    To use to the _continuous_ environment, you need to specify the
+    `continuous=True` argument like below:
+    ```python
+    import gymnasium as gym
+    env = gym.make(
+        "LunarLander-v2",
+        continuous: bool = False,
+        gravity: float = -10.0,
+        enable_wind: bool = False,
+        wind_power: float = 15.0,
+        turbulence_power: float = 1.5,
+    )
+    ```
+    If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the
+    action space will be `Box(-1, +1, (2,), dtype=np.float32)`.
+    The first coordinate of an action determines the throttle of the main engine, while the second
+    coordinate specifies the throttle of the lateral boosters.
+    Given an action `np.array([main, lateral])`, the main engine will be turned off completely if
+    `main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the
+    main engine doesn't work  with less than 50% power).
+    Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
+    booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
+    from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
+
+    `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12.
+
+    If `enable_wind=True` is passed, there will be wind effects applied to the lander.
+    The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`.
+    `k` is set to 0.01.
+    `C` is sampled randomly between -9999 and 9999.
+
+    `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0.
+    `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0.
+
+    ## Version History
+    - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters
+    - v1: Legs contact with ground added in state vector; contact with ground
+        give +10 reward points, and -10 if then lose contact; reward
+        renormalized to 200; harder initial random push.
+    - v0: Initial version
+
+
+    ## Notes
+
+    There are several unexpected bugs with the implementation of the environment.
+
+    1. The position of the side thursters on the body of the lander changes, depending on the orientation of the lander.
+    This in turn results in an orientation depentant torque being applied to the lander.
+
+    2. The units of the state are not consistent. I.e.
+    * The angular velocity is in units of 0.4 radians per second. In order to convert to radians per second, the value needs to be multiplied by a factor of 2.5.
+
+    For the default values of VIEWPORT_W, VIEWPORT_H, SCALE, and FPS, the scale factors equal:
+    'x': 10
+    'y': 6.666
+    'vx': 5
+    'vy': 7.5
+    'angle': 1
+    'angular velocity': 2.5
+
+    After the correction has been made, the units of the state are as follows:
+    'x': (units)
+    'y': (units)
+    'vx': (units/second)
+    'vy': (units/second)
+    'angle': (radians)
+    'angular velocity': (radians/second)
+
+
+    <!-- ## References -->
+
+    ## Credits
+    Created by Oleg Klimov
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": FPS,
+    }
+
+    def __init__(
+        self,
+        render_mode: Optional[str] = None,
+        continuous: bool = False,
+        gravity: float = -10.0,
+        enable_wind: bool = False,
+        wind_power: float = 15.0,
+        turbulence_power: float = 1.5,
+    ):
+        EzPickle.__init__(
+            self,
+            render_mode,
+            continuous,
+            gravity,
+            enable_wind,
+            wind_power,
+            turbulence_power,
+        )
+
+        assert (
+            -12.0 < gravity and gravity < 0.0
+        ), f"gravity (current value: {gravity}) must be between -12 and 0"
+        self.gravity = gravity
+
+        if 0.0 > wind_power or wind_power > 20.0:
+            warnings.warn(
+                colorize(
+                    f"WARN: wind_power value is recommended to be between 0.0 and 20.0, (current value: {wind_power})",
+                    "yellow",
+                ),
+            )
+        self.wind_power = wind_power
+
+        if 0.0 > turbulence_power or turbulence_power > 2.0:
+            warnings.warn(
+                colorize(
+                    f"WARN: turbulence_power value is recommended to be between 0.0 and 2.0, (current value: {turbulence_power})",
+                    "yellow",
+                ),
+            )
+        self.turbulence_power = turbulence_power
+
+        self.enable_wind = enable_wind
+        self.wind_idx = np.random.randint(-9999, 9999)
+        self.torque_idx = np.random.randint(-9999, 9999)
+
+        self.screen: pygame.Surface = None
+        self.clock = None
+        self.isopen = True
+        self.world = Box2D.b2World(gravity=(0, gravity))
+        self.moon = None
+        self.lander: Optional[Box2D.b2Body] = None
+        self.particles = []
+
+        self.prev_reward = None
+
+        self.continuous = continuous
+
+        low = np.array(
+            [
+                # these are bounds for position
+                # realistically the environment should have ended
+                # long before we reach more than 50% outside
+                -1.5,
+                -1.5,
+                # velocity bounds is 5x rated speed
+                -5.0,
+                -5.0,
+                -math.pi,
+                -5.0,
+                -0.0,
+                -0.0,
+            ]
+        ).astype(np.float32)
+        high = np.array(
+            [
+                # these are bounds for position
+                # realistically the environment should have ended
+                # long before we reach more than 50% outside
+                1.5,
+                1.5,
+                # velocity bounds is 5x rated speed
+                5.0,
+                5.0,
+                math.pi,
+                5.0,
+                1.0,
+                1.0,
+            ]
+        ).astype(np.float32)
+
+        # useful range is -1 .. +1, but spikes can be higher
+        self.observation_space = spaces.Box(low, high)
+
+        if self.continuous:
+            # Action is two floats [main engine, left-right engines].
+            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
+            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
+            self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
+        else:
+            # Nop, fire left engine, main engine, right engine
+            self.action_space = spaces.Discrete(4)
+
+        self.render_mode = render_mode
+
+    def _destroy(self):
+        if not self.moon:
+            return
+        self.world.contactListener = None
+        self._clean_particles(True)
+        self.world.DestroyBody(self.moon)
+        self.moon = None
+        self.world.DestroyBody(self.lander)
+        self.lander = None
+        self.world.DestroyBody(self.legs[0])
+        self.world.DestroyBody(self.legs[1])
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        super().reset(seed=seed)
+        self._destroy()
+        self.world.contactListener_keepref = ContactDetector(self)
+        self.world.contactListener = self.world.contactListener_keepref
+        self.game_over = False
+        self.prev_shaping = None
+
+        W = VIEWPORT_W / SCALE
+        H = VIEWPORT_H / SCALE
+
+        # Create Terrain
+        CHUNKS = 11
+        height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,))
+        chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
+        self.helipad_x1 = chunk_x[CHUNKS // 2 - 1]
+        self.helipad_x2 = chunk_x[CHUNKS // 2 + 1]
+        self.helipad_y = H / 4
+        height[CHUNKS // 2 - 2] = self.helipad_y
+        height[CHUNKS // 2 - 1] = self.helipad_y
+        height[CHUNKS // 2 + 0] = self.helipad_y
+        height[CHUNKS // 2 + 1] = self.helipad_y
+        height[CHUNKS // 2 + 2] = self.helipad_y
+        smooth_y = [
+            0.33 * (height[i - 1] + height[i + 0] + height[i + 1])
+            for i in range(CHUNKS)
+        ]
+
+        self.moon = self.world.CreateStaticBody(
+            shapes=edgeShape(vertices=[(0, 0), (W, 0)])
+        )
+        self.sky_polys = []
+        for i in range(CHUNKS - 1):
+            p1 = (chunk_x[i], smooth_y[i])
+            p2 = (chunk_x[i + 1], smooth_y[i + 1])
+            self.moon.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1)
+            self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
+
+        self.moon.color1 = (0.0, 0.0, 0.0)
+        self.moon.color2 = (0.0, 0.0, 0.0)
+
+        # Create Lander body
+        initial_y = VIEWPORT_H / SCALE
+        initial_x = VIEWPORT_W / SCALE / 2
+        self.lander: Box2D.b2Body = self.world.CreateDynamicBody(
+            position=(initial_x, initial_y),
+            angle=0.0,
+            fixtures=fixtureDef(
+                shape=polygonShape(
+                    vertices=[(x / SCALE, y / SCALE) for x, y in LANDER_POLY]
+                ),
+                density=5.0,
+                friction=0.1,
+                categoryBits=0x0010,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.0,
+            ),  # 0.99 bouncy
+        )
+        self.lander.color1 = (128, 102, 230)
+        self.lander.color2 = (77, 77, 128)
+
+        # Apply the initial random impulse to the lander
+        self.lander.ApplyForceToCenter(
+            (
+                self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
+                self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
+            ),
+            True,
+        )
+
+        # Create Lander Legs
+        self.legs = []
+        for i in [-1, +1]:
+            leg = self.world.CreateDynamicBody(
+                position=(initial_x - i * LEG_AWAY / SCALE, initial_y),
+                angle=(i * 0.05),
+                fixtures=fixtureDef(
+                    shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)),
+                    density=1.0,
+                    restitution=0.0,
+                    categoryBits=0x0020,
+                    maskBits=0x001,
+                ),
+            )
+            leg.ground_contact = False
+            leg.color1 = (128, 102, 230)
+            leg.color2 = (77, 77, 128)
+            rjd = revoluteJointDef(
+                bodyA=self.lander,
+                bodyB=leg,
+                localAnchorA=(0, 0),
+                localAnchorB=(i * LEG_AWAY / SCALE, LEG_DOWN / SCALE),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=LEG_SPRING_TORQUE,
+                motorSpeed=+0.3 * i,  # low enough not to jump back into the sky
+            )
+            if i == -1:
+                rjd.lowerAngle = (
+                    +0.9 - 0.5
+                )  # The most esoteric numbers here, angled legs have freedom to travel within
+                rjd.upperAngle = +0.9
+            else:
+                rjd.lowerAngle = -0.9
+                rjd.upperAngle = -0.9 + 0.5
+            leg.joint = self.world.CreateJoint(rjd)
+            self.legs.append(leg)
+
+        self.drawlist = [self.lander] + self.legs
+
+        if self.render_mode == "human":
+            self.render()
+        return self.step(np.array([0, 0]) if self.continuous else 0)[0], {}
+
+    def _create_particle(self, mass, x, y, ttl):
+        p = self.world.CreateDynamicBody(
+            position=(x, y),
+            angle=0.0,
+            fixtures=fixtureDef(
+                shape=circleShape(radius=2 / SCALE, pos=(0, 0)),
+                density=mass,
+                friction=0.1,
+                categoryBits=0x0100,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.3,
+            ),
+        )
+        p.ttl = ttl
+        self.particles.append(p)
+        self._clean_particles(False)
+        return p
+
+    def _clean_particles(self, all):
+        while self.particles and (all or self.particles[0].ttl < 0):
+            self.world.DestroyBody(self.particles.pop(0))
+
+    def step(self, action):
+        assert self.lander is not None
+
+        # Update wind and apply to the lander
+        assert self.lander is not None, "You forgot to call reset()"
+        if self.enable_wind and not (
+            self.legs[0].ground_contact or self.legs[1].ground_contact
+        ):
+            # the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
+            # which is proven to never be periodic, k = 0.01
+            wind_mag = (
+                math.tanh(
+                    math.sin(0.02 * self.wind_idx)
+                    + (math.sin(math.pi * 0.01 * self.wind_idx))
+                )
+                * self.wind_power
+            )
+            self.wind_idx += 1
+            self.lander.ApplyForceToCenter(
+                (wind_mag, 0.0),
+                True,
+            )
+
+            # the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
+            # which is proven to never be periodic, k = 0.01
+            torque_mag = math.tanh(
+                math.sin(0.02 * self.torque_idx)
+                + (math.sin(math.pi * 0.01 * self.torque_idx))
+            ) * (self.turbulence_power)
+            self.torque_idx += 1
+            self.lander.ApplyTorque(
+                (torque_mag),
+                True,
+            )
+
+        if self.continuous:
+            action = np.clip(action, -1, +1).astype(np.float32)
+        else:
+            assert self.action_space.contains(
+                action
+            ), f"{action!r} ({type(action)}) invalid "
+
+        # Apply Engine Impulses
+
+        # Tip is a the (X and Y) components of the rotation of the lander.
+        tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+
+        # Side is the (-Y and X) components of the rotation of the lander.
+        side = (-tip[1], tip[0])
+
+        # Generate two random numbers between -1/SCALE and 1/SCALE.
+        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
+
+        m_power = 0.0
+        if (self.continuous and action[0] > 0.0) or (
+            not self.continuous and action == 2
+        ):
+            # Main engine
+            if self.continuous:
+                m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5  # 0.5..1.0
+                assert m_power >= 0.5 and m_power <= 1.0
+            else:
+                m_power = 1.0
+
+            # 4 is move a bit downwards, +-2 for randomness
+            # The components of the impulse to be applied by the main engine.
+            ox = (
+                tip[0] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0])
+                + side[0] * dispersion[1]
+            )
+            oy = (
+                -tip[1] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0])
+                - side[1] * dispersion[1]
+            )
+
+            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
+            if self.render_mode is not None:
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
+                p = self._create_particle(
+                    3.5,  # 3.5 is here to make particle speed adequate
+                    impulse_pos[0],
+                    impulse_pos[1],
+                    m_power,
+                )
+                p.ApplyLinearImpulse(
+                    (
+                        ox * MAIN_ENGINE_POWER * m_power,
+                        oy * MAIN_ENGINE_POWER * m_power,
+                    ),
+                    impulse_pos,
+                    True,
+                )
+            self.lander.ApplyLinearImpulse(
+                (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
+                impulse_pos,
+                True,
+            )
+
+        s_power = 0.0
+        if (self.continuous and np.abs(action[1]) > 0.5) or (
+            not self.continuous and action in [1, 3]
+        ):
+            # Orientation/Side engines
+            if self.continuous:
+                direction = np.sign(action[1])
+                s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
+                assert s_power >= 0.5 and s_power <= 1.0
+            else:
+                # action = 1 is left, action = 3 is right
+                direction = action - 2
+                s_power = 1.0
+
+            # The components of the impulse to be applied by the side engines.
+            ox = tip[0] * dispersion[0] + side[0] * (
+                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
+            )
+            oy = -tip[1] * dispersion[0] - side[1] * (
+                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
+            )
+
+            # The constant 17 is a constant, that is presumably meant to be SIDE_ENGINE_HEIGHT.
+            # However, SIDE_ENGINE_HEIGHT is defined as 14
+            # This casuses the position of the thurst on the body of the lander to change, depending on the orientation of the lander.
+            # This in turn results in an orientation depentant torque being applied to the lander.
+            impulse_pos = (
+                self.lander.position[0] + ox - tip[0] * 17 / SCALE,
+                self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
+            )
+            if self.render_mode is not None:
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
+                p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
+                p.ApplyLinearImpulse(
+                    (
+                        ox * SIDE_ENGINE_POWER * s_power,
+                        oy * SIDE_ENGINE_POWER * s_power,
+                    ),
+                    impulse_pos,
+                    True,
+                )
+            self.lander.ApplyLinearImpulse(
+                (-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
+                impulse_pos,
+                True,
+            )
+
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
+
+        pos = self.lander.position
+        vel = self.lander.linearVelocity
+
+        state = [
+            (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
+            (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
+            vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
+            vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
+            self.lander.angle,
+            20.0 * self.lander.angularVelocity / FPS,
+            1.0 if self.legs[0].ground_contact else 0.0,
+            1.0 if self.legs[1].ground_contact else 0.0,
+        ]
+        assert len(state) == 8
+
+        reward = 0
+        shaping = (
+            -100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
+            - 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
+            - 100 * abs(state[4])
+            + 10 * state[6]
+            + 10 * state[7]
+        )  # And ten points for legs contact, the idea is if you
+        # lose contact again after landing, you get negative reward
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        reward -= (
+            m_power * 0.30
+        )  # less fuel spent is better, about -30 for heuristic landing
+        reward -= s_power * 0.03
+
+        terminated = False
+        if self.game_over or abs(state[0]) >= 1.0:
+            terminated = True
+            reward = -100
+        if not self.lander.awake:
+            terminated = True
+            reward = +100
+
+        if self.render_mode == "human":
+            self.render()
+        return np.array(state, dtype=np.float32), reward, terminated, False, {}
+
+    def render(self):
+        if self.render_mode is None:
+            assert self.spec is not None
+            gym.logger.warn(
+                "You are calling render method without specifying any render mode. "
+                "You can specify the render_mode at initialization, "
+                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+            )
+            return
+
+        try:
+            import pygame
+            from pygame import gfxdraw
+        except ImportError as e:
+            raise DependencyNotInstalled(
+                "pygame is not installed, run `pip install gymnasium[box2d]`"
+            ) from e
+
+        if self.screen is None and self.render_mode == "human":
+            pygame.init()
+            pygame.display.init()
+            self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
+        if self.clock is None:
+            self.clock = pygame.time.Clock()
+
+        self.surf = pygame.Surface((VIEWPORT_W, VIEWPORT_H))
+
+        pygame.transform.scale(self.surf, (SCALE, SCALE))
+        pygame.draw.rect(self.surf, (255, 255, 255), self.surf.get_rect())
+
+        for obj in self.particles:
+            obj.ttl -= 0.15
+            obj.color1 = (
+                int(max(0.2, 0.15 + obj.ttl) * 255),
+                int(max(0.2, 0.5 * obj.ttl) * 255),
+                int(max(0.2, 0.5 * obj.ttl) * 255),
+            )
+            obj.color2 = (
+                int(max(0.2, 0.15 + obj.ttl) * 255),
+                int(max(0.2, 0.5 * obj.ttl) * 255),
+                int(max(0.2, 0.5 * obj.ttl) * 255),
+            )
+
+        self._clean_particles(False)
+
+        for p in self.sky_polys:
+            scaled_poly = []
+            for coord in p:
+                scaled_poly.append((coord[0] * SCALE, coord[1] * SCALE))
+            pygame.draw.polygon(self.surf, (0, 0, 0), scaled_poly)
+            gfxdraw.aapolygon(self.surf, scaled_poly, (0, 0, 0))
+
+        for obj in self.particles + self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    pygame.draw.circle(
+                        self.surf,
+                        color=obj.color1,
+                        center=trans * f.shape.pos * SCALE,
+                        radius=f.shape.radius * SCALE,
+                    )
+                    pygame.draw.circle(
+                        self.surf,
+                        color=obj.color2,
+                        center=trans * f.shape.pos * SCALE,
+                        radius=f.shape.radius * SCALE,
+                    )
+
+                else:
+                    path = [trans * v * SCALE for v in f.shape.vertices]
+                    pygame.draw.polygon(self.surf, color=obj.color1, points=path)
+                    gfxdraw.aapolygon(self.surf, path, obj.color1)
+                    pygame.draw.aalines(
+                        self.surf, color=obj.color2, points=path, closed=True
+                    )
+
+                for x in [self.helipad_x1, self.helipad_x2]:
+                    x = x * SCALE
+                    flagy1 = self.helipad_y * SCALE
+                    flagy2 = flagy1 + 50
+                    pygame.draw.line(
+                        self.surf,
+                        color=(255, 255, 255),
+                        start_pos=(x, flagy1),
+                        end_pos=(x, flagy2),
+                        width=1,
+                    )
+                    pygame.draw.polygon(
+                        self.surf,
+                        color=(204, 204, 0),
+                        points=[
+                            (x, flagy2),
+                            (x, flagy2 - 10),
+                            (x + 25, flagy2 - 5),
+                        ],
+                    )
+                    gfxdraw.aapolygon(
+                        self.surf,
+                        [(x, flagy2), (x, flagy2 - 10), (x + 25, flagy2 - 5)],
+                        (204, 204, 0),
+                    )
+
+        self.surf = pygame.transform.flip(self.surf, False, True)
+
+        if self.render_mode == "human":
+            assert self.screen is not None
+            self.screen.blit(self.surf, (0, 0))
+            pygame.event.pump()
+            self.clock.tick(self.metadata["render_fps"])
+            pygame.display.flip()
+        elif self.render_mode == "rgb_array":
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
+            )
+
+    def close(self):
+        if self.screen is not None:
+            import pygame
+
+            pygame.display.quit()
+            pygame.quit()
+            self.isopen = False
+
+
+def heuristic(env, s):
+    """
+    The heuristic for
+    1. Testing
+    2. Demonstration rollout.
+
+    Args:
+        env: The environment
+        s (list): The state. Attributes:
+            s[0] is the horizontal coordinate
+            s[1] is the vertical coordinate
+            s[2] is the horizontal speed
+            s[3] is the vertical speed
+            s[4] is the angle
+            s[5] is the angular speed
+            s[6] 1 if first leg has contact, else 0
+            s[7] 1 if second leg has contact, else 0
+
+    Returns:
+         a: The heuristic to be fed into the step function defined above to determine the next step and reward.
+    """
+
+    angle_targ = s[0] * 0.5 + s[2] * 1.0  # angle should point towards center
+    if angle_targ > 0.4:
+        angle_targ = 0.4  # more than 0.4 radians (22 degrees) is bad
+    if angle_targ < -0.4:
+        angle_targ = -0.4
+    hover_targ = 0.55 * np.abs(
+        s[0]
+    )  # target y should be proportional to horizontal offset
+
+    angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
+    hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5
+
+    if s[6] or s[7]:  # legs have contact
+        angle_todo = 0
+        hover_todo = (
+            -(s[3]) * 0.5
+        )  # override to reduce fall speed, that's all we need after contact
+
+    if env.continuous:
+        a = np.array([hover_todo * 20 - 1, -angle_todo * 20])
+        a = np.clip(a, -1, +1)
+    else:
+        a = 0
+        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
+            a = 2
+        elif angle_todo < -0.05:
+            a = 3
+        elif angle_todo > +0.05:
+            a = 1
+    return a
+
+
+def demo_heuristic_lander(env, seed=None, render=False):
+    total_reward = 0
+    steps = 0
+    s, info = env.reset(seed=seed)
+    while True:
+        a = heuristic(env, s)
+        s, r, terminated, truncated, info = step_api_compatibility(env.step(a), True)
+        total_reward += r
+
+        if render:
+            still_open = env.render()
+            if still_open is False:
+                break
+
+        if steps % 20 == 0 or terminated or truncated:
+            print("observations:", " ".join([f"{x:+0.2f}" for x in s]))
+            print(f"step {steps} total_reward {total_reward:+0.2f}")
+        steps += 1
+        if terminated or truncated:
+            break
+    if render:
+        env.close()
+    return total_reward
+
+
+class LunarLanderContinuous:
+    def __init__(self):
+        raise error.Error(
+            "Error initializing LunarLanderContinuous Environment.\n"
+            "Currently, we do not support initializing this mode of environment by calling the class directly.\n"
+            "To use this environment, instead create it by specifying the continuous keyword in gym.make, i.e.\n"
+            'gym.make("LunarLander-v2", continuous=True)'
+        )
+
+
+if __name__ == "__main__":
+    env = gym.make("LunarLander-v2", render_mode="rgb_array")
+    demo_heuristic_lander(env, render=True)