862 lines
31 KiB
Python
862 lines
31 KiB
Python
__credits__ = ["Andrea PIERRÉ"]
|
|
|
|
import math
|
|
from typing import TYPE_CHECKING, List, Optional
|
|
|
|
import numpy as np
|
|
|
|
import gymnasium as gym
|
|
from gymnasium import error, spaces
|
|
from gymnasium.error import DependencyNotInstalled
|
|
from gymnasium.utils import EzPickle
|
|
|
|
|
|
try:
|
|
import Box2D
|
|
from Box2D.b2 import (
|
|
circleShape,
|
|
contactListener,
|
|
edgeShape,
|
|
fixtureDef,
|
|
polygonShape,
|
|
revoluteJointDef,
|
|
)
|
|
except ImportError as e:
|
|
raise DependencyNotInstalled(
|
|
"Box2D is not installed, run `pip install gymnasium[box2d]`"
|
|
) from e
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
import pygame
|
|
|
|
FPS = 50
|
|
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
|
|
|
|
MOTORS_TORQUE = 80
|
|
SPEED_HIP = 4
|
|
SPEED_KNEE = 6
|
|
LIDAR_RANGE = 160 / SCALE
|
|
|
|
INITIAL_RANDOM = 5
|
|
|
|
HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
|
|
LEG_DOWN = -8 / SCALE
|
|
LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
|
|
|
|
VIEWPORT_W = 600
|
|
VIEWPORT_H = 400
|
|
|
|
TERRAIN_STEP = 14 / SCALE
|
|
TERRAIN_LENGTH = 200 # in steps
|
|
TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
|
|
TERRAIN_GRASS = 10 # low long are grass spots, in steps
|
|
TERRAIN_STARTPAD = 20 # in steps
|
|
FRICTION = 2.5
|
|
|
|
HULL_FD = fixtureDef(
|
|
shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
|
|
density=5.0,
|
|
friction=0.1,
|
|
categoryBits=0x0020,
|
|
maskBits=0x001, # collide only with ground
|
|
restitution=0.0,
|
|
) # 0.99 bouncy
|
|
|
|
LEG_FD = fixtureDef(
|
|
shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
|
|
density=1.0,
|
|
restitution=0.0,
|
|
categoryBits=0x0020,
|
|
maskBits=0x001,
|
|
)
|
|
|
|
LOWER_FD = fixtureDef(
|
|
shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
|
|
density=1.0,
|
|
restitution=0.0,
|
|
categoryBits=0x0020,
|
|
maskBits=0x001,
|
|
)
|
|
|
|
|
|
class ContactDetector(contactListener):
|
|
def __init__(self, env):
|
|
contactListener.__init__(self)
|
|
self.env = env
|
|
|
|
def BeginContact(self, contact):
|
|
if (
|
|
self.env.hull == contact.fixtureA.body
|
|
or self.env.hull == contact.fixtureB.body
|
|
):
|
|
self.env.game_over = True
|
|
for leg in [self.env.legs[1], self.env.legs[3]]:
|
|
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
|
|
leg.ground_contact = True
|
|
|
|
def EndContact(self, contact):
|
|
for leg in [self.env.legs[1], self.env.legs[3]]:
|
|
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
|
|
leg.ground_contact = False
|
|
|
|
|
|
class BipedalWalker(gym.Env, EzPickle):
|
|
"""
|
|
## Description
|
|
This is a simple 4-joint walker robot environment.
|
|
There are two versions:
|
|
- Normal, with slightly uneven terrain.
|
|
- Hardcore, with ladders, stumps, pitfalls.
|
|
|
|
To solve the normal version, you need to get 300 points in 1600 time steps.
|
|
To solve the hardcore version, you need 300 points in 2000 time steps.
|
|
|
|
A heuristic is provided for testing. It's also useful to get demonstrations
|
|
to learn from. To run the heuristic:
|
|
```
|
|
python gymnasium/envs/box2d/bipedal_walker.py
|
|
```
|
|
|
|
## Action Space
|
|
Actions are motor speed values in the [-1, 1] range for each of the
|
|
4 joints at both hips and knees.
|
|
|
|
## Observation Space
|
|
State consists of hull angle speed, angular velocity, horizontal speed,
|
|
vertical speed, position of joints and joints angular speed, legs contact
|
|
with ground, and 10 lidar rangefinder measurements. There are no coordinates
|
|
in the state vector.
|
|
|
|
## Rewards
|
|
Reward is given for moving forward, totaling 300+ points up to the far end.
|
|
If the robot falls, it gets -100. Applying motor torque costs a small
|
|
amount of points. A more optimal agent will get a better score.
|
|
|
|
## Starting State
|
|
The walker starts standing at the left end of the terrain with the hull
|
|
horizontal, and both legs in the same position with a slight knee angle.
|
|
|
|
## Episode Termination
|
|
The episode will terminate if the hull gets in contact with the ground or
|
|
if the walker exceeds the right end of the terrain length.
|
|
|
|
## Arguments
|
|
To use to the _hardcore_ environment, you need to specify the
|
|
`hardcore=True` argument like below:
|
|
```python
|
|
import gymnasium as gym
|
|
env = gym.make("BipedalWalker-v3", hardcore=True)
|
|
```
|
|
|
|
## Version History
|
|
- v3: Returns the closest lidar trace instead of furthest;
|
|
faster video recording
|
|
- v2: Count energy spent
|
|
- v1: Legs now report contact with ground; motors have higher torque and
|
|
speed; ground has higher friction; lidar rendered less nervously.
|
|
- v0: Initial version
|
|
|
|
|
|
<!-- ## References -->
|
|
|
|
## Credits
|
|
Created by Oleg Klimov
|
|
|
|
"""
|
|
|
|
metadata = {
|
|
"render_modes": ["human", "rgb_array"],
|
|
"render_fps": FPS,
|
|
}
|
|
|
|
def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False):
|
|
EzPickle.__init__(self, render_mode, hardcore)
|
|
self.isopen = True
|
|
|
|
self.world = Box2D.b2World()
|
|
self.terrain: List[Box2D.b2Body] = []
|
|
self.hull: Optional[Box2D.b2Body] = None
|
|
|
|
self.prev_shaping = None
|
|
|
|
self.hardcore = hardcore
|
|
|
|
self.fd_polygon = fixtureDef(
|
|
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
|
|
friction=FRICTION,
|
|
)
|
|
|
|
self.fd_edge = fixtureDef(
|
|
shape=edgeShape(vertices=[(0, 0), (1, 1)]),
|
|
friction=FRICTION,
|
|
categoryBits=0x0001,
|
|
)
|
|
|
|
# we use 5.0 to represent the joints moving at maximum
|
|
# 5 x the rated speed due to impulses from ground contact etc.
|
|
low = np.array(
|
|
[
|
|
-math.pi,
|
|
-5.0,
|
|
-5.0,
|
|
-5.0,
|
|
-math.pi,
|
|
-5.0,
|
|
-math.pi,
|
|
-5.0,
|
|
-0.0,
|
|
-math.pi,
|
|
-5.0,
|
|
-math.pi,
|
|
-5.0,
|
|
-0.0,
|
|
]
|
|
+ [-1.0] * 10
|
|
).astype(np.float32)
|
|
high = np.array(
|
|
[
|
|
math.pi,
|
|
5.0,
|
|
5.0,
|
|
5.0,
|
|
math.pi,
|
|
5.0,
|
|
math.pi,
|
|
5.0,
|
|
5.0,
|
|
math.pi,
|
|
5.0,
|
|
math.pi,
|
|
5.0,
|
|
5.0,
|
|
]
|
|
+ [1.0] * 10
|
|
).astype(np.float32)
|
|
self.action_space = spaces.Box(
|
|
np.array([-1, -1, -1, -1]).astype(np.float32),
|
|
np.array([1, 1, 1, 1]).astype(np.float32),
|
|
)
|
|
self.observation_space = spaces.Box(low, high)
|
|
|
|
# state = [
|
|
# self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
|
|
# 2.0 * self.hull.angularVelocity / FPS,
|
|
# 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
|
|
# 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
|
|
# self.joints[
|
|
# 0
|
|
# ].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
|
|
# self.joints[0].speed / SPEED_HIP,
|
|
# self.joints[1].angle + 1.0,
|
|
# self.joints[1].speed / SPEED_KNEE,
|
|
# 1.0 if self.legs[1].ground_contact else 0.0,
|
|
# self.joints[2].angle,
|
|
# self.joints[2].speed / SPEED_HIP,
|
|
# self.joints[3].angle + 1.0,
|
|
# self.joints[3].speed / SPEED_KNEE,
|
|
# 1.0 if self.legs[3].ground_contact else 0.0,
|
|
# ]
|
|
# state += [l.fraction for l in self.lidar]
|
|
|
|
self.render_mode = render_mode
|
|
self.screen: Optional[pygame.Surface] = None
|
|
self.clock = None
|
|
|
|
def _destroy(self):
|
|
if not self.terrain:
|
|
return
|
|
self.world.contactListener = None
|
|
for t in self.terrain:
|
|
self.world.DestroyBody(t)
|
|
self.terrain = []
|
|
self.world.DestroyBody(self.hull)
|
|
self.hull = None
|
|
for leg in self.legs:
|
|
self.world.DestroyBody(leg)
|
|
self.legs = []
|
|
self.joints = []
|
|
|
|
def _generate_terrain(self, hardcore):
|
|
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
|
|
state = GRASS
|
|
velocity = 0.0
|
|
y = TERRAIN_HEIGHT
|
|
counter = TERRAIN_STARTPAD
|
|
oneshot = False
|
|
self.terrain = []
|
|
self.terrain_x = []
|
|
self.terrain_y = []
|
|
|
|
stair_steps, stair_width, stair_height = 0, 0, 0
|
|
original_y = 0
|
|
for i in range(TERRAIN_LENGTH):
|
|
x = i * TERRAIN_STEP
|
|
self.terrain_x.append(x)
|
|
|
|
if state == GRASS and not oneshot:
|
|
velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
|
|
if i > TERRAIN_STARTPAD:
|
|
velocity += self.np_random.uniform(-1, 1) / SCALE # 1
|
|
y += velocity
|
|
|
|
elif state == PIT and oneshot:
|
|
counter = self.np_random.integers(3, 5)
|
|
poly = [
|
|
(x, y),
|
|
(x + TERRAIN_STEP, y),
|
|
(x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
|
|
(x, y - 4 * TERRAIN_STEP),
|
|
]
|
|
self.fd_polygon.shape.vertices = poly
|
|
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
|
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
|
|
self.terrain.append(t)
|
|
|
|
self.fd_polygon.shape.vertices = [
|
|
(p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
|
|
]
|
|
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
|
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
|
|
self.terrain.append(t)
|
|
counter += 2
|
|
original_y = y
|
|
|
|
elif state == PIT and not oneshot:
|
|
y = original_y
|
|
if counter > 1:
|
|
y -= 4 * TERRAIN_STEP
|
|
|
|
elif state == STUMP and oneshot:
|
|
counter = self.np_random.integers(1, 3)
|
|
poly = [
|
|
(x, y),
|
|
(x + counter * TERRAIN_STEP, y),
|
|
(x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
|
|
(x, y + counter * TERRAIN_STEP),
|
|
]
|
|
self.fd_polygon.shape.vertices = poly
|
|
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
|
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
|
|
self.terrain.append(t)
|
|
|
|
elif state == STAIRS and oneshot:
|
|
stair_height = +1 if self.np_random.random() > 0.5 else -1
|
|
stair_width = self.np_random.integers(4, 5)
|
|
stair_steps = self.np_random.integers(3, 5)
|
|
original_y = y
|
|
for s in range(stair_steps):
|
|
poly = [
|
|
(
|
|
x + (s * stair_width) * TERRAIN_STEP,
|
|
y + (s * stair_height) * TERRAIN_STEP,
|
|
),
|
|
(
|
|
x + ((1 + s) * stair_width) * TERRAIN_STEP,
|
|
y + (s * stair_height) * TERRAIN_STEP,
|
|
),
|
|
(
|
|
x + ((1 + s) * stair_width) * TERRAIN_STEP,
|
|
y + (-1 + s * stair_height) * TERRAIN_STEP,
|
|
),
|
|
(
|
|
x + (s * stair_width) * TERRAIN_STEP,
|
|
y + (-1 + s * stair_height) * TERRAIN_STEP,
|
|
),
|
|
]
|
|
self.fd_polygon.shape.vertices = poly
|
|
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
|
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
|
|
self.terrain.append(t)
|
|
counter = stair_steps * stair_width
|
|
|
|
elif state == STAIRS and not oneshot:
|
|
s = stair_steps * stair_width - counter - stair_height
|
|
n = s / stair_width
|
|
y = original_y + (n * stair_height) * TERRAIN_STEP
|
|
|
|
oneshot = False
|
|
self.terrain_y.append(y)
|
|
counter -= 1
|
|
if counter == 0:
|
|
counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS)
|
|
if state == GRASS and hardcore:
|
|
state = self.np_random.integers(1, _STATES_)
|
|
oneshot = True
|
|
else:
|
|
state = GRASS
|
|
oneshot = True
|
|
|
|
self.terrain_poly = []
|
|
for i in range(TERRAIN_LENGTH - 1):
|
|
poly = [
|
|
(self.terrain_x[i], self.terrain_y[i]),
|
|
(self.terrain_x[i + 1], self.terrain_y[i + 1]),
|
|
]
|
|
self.fd_edge.shape.vertices = poly
|
|
t = self.world.CreateStaticBody(fixtures=self.fd_edge)
|
|
color = (76, 255 if i % 2 == 0 else 204, 76)
|
|
t.color1 = color
|
|
t.color2 = color
|
|
self.terrain.append(t)
|
|
color = (102, 153, 76)
|
|
poly += [(poly[1][0], 0), (poly[0][0], 0)]
|
|
self.terrain_poly.append((poly, color))
|
|
self.terrain.reverse()
|
|
|
|
def _generate_clouds(self):
|
|
# Sorry for the clouds, couldn't resist
|
|
self.cloud_poly = []
|
|
for i in range(TERRAIN_LENGTH // 20):
|
|
x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
|
|
y = VIEWPORT_H / SCALE * 3 / 4
|
|
poly = [
|
|
(
|
|
x
|
|
+ 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
|
|
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
|
|
y
|
|
+ 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
|
|
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
|
|
)
|
|
for a in range(5)
|
|
]
|
|
x1 = min(p[0] for p in poly)
|
|
x2 = max(p[0] for p in poly)
|
|
self.cloud_poly.append((poly, x1, x2))
|
|
|
|
def reset(
|
|
self,
|
|
*,
|
|
seed: Optional[int] = None,
|
|
options: Optional[dict] = None,
|
|
):
|
|
super().reset(seed=seed)
|
|
self._destroy()
|
|
self.world.contactListener_bug_workaround = ContactDetector(self)
|
|
self.world.contactListener = self.world.contactListener_bug_workaround
|
|
self.game_over = False
|
|
self.prev_shaping = None
|
|
self.scroll = 0.0
|
|
self.lidar_render = 0
|
|
|
|
self._generate_terrain(self.hardcore)
|
|
self._generate_clouds()
|
|
|
|
init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
|
|
init_y = TERRAIN_HEIGHT + 2 * LEG_H
|
|
self.hull = self.world.CreateDynamicBody(
|
|
position=(init_x, init_y), fixtures=HULL_FD
|
|
)
|
|
self.hull.color1 = (127, 51, 229)
|
|
self.hull.color2 = (76, 76, 127)
|
|
self.hull.ApplyForceToCenter(
|
|
(self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
|
|
)
|
|
|
|
self.legs: List[Box2D.b2Body] = []
|
|
self.joints: List[Box2D.b2RevoluteJoint] = []
|
|
for i in [-1, +1]:
|
|
leg = self.world.CreateDynamicBody(
|
|
position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
|
|
angle=(i * 0.05),
|
|
fixtures=LEG_FD,
|
|
)
|
|
leg.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
|
|
leg.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
|
|
rjd = revoluteJointDef(
|
|
bodyA=self.hull,
|
|
bodyB=leg,
|
|
localAnchorA=(0, LEG_DOWN),
|
|
localAnchorB=(0, LEG_H / 2),
|
|
enableMotor=True,
|
|
enableLimit=True,
|
|
maxMotorTorque=MOTORS_TORQUE,
|
|
motorSpeed=i,
|
|
lowerAngle=-0.8,
|
|
upperAngle=1.1,
|
|
)
|
|
self.legs.append(leg)
|
|
self.joints.append(self.world.CreateJoint(rjd))
|
|
|
|
lower = self.world.CreateDynamicBody(
|
|
position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
|
|
angle=(i * 0.05),
|
|
fixtures=LOWER_FD,
|
|
)
|
|
lower.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
|
|
lower.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
|
|
rjd = revoluteJointDef(
|
|
bodyA=leg,
|
|
bodyB=lower,
|
|
localAnchorA=(0, -LEG_H / 2),
|
|
localAnchorB=(0, LEG_H / 2),
|
|
enableMotor=True,
|
|
enableLimit=True,
|
|
maxMotorTorque=MOTORS_TORQUE,
|
|
motorSpeed=1,
|
|
lowerAngle=-1.6,
|
|
upperAngle=-0.1,
|
|
)
|
|
lower.ground_contact = False
|
|
self.legs.append(lower)
|
|
self.joints.append(self.world.CreateJoint(rjd))
|
|
|
|
self.drawlist = self.terrain + self.legs + [self.hull]
|
|
|
|
class LidarCallback(Box2D.b2.rayCastCallback):
|
|
def ReportFixture(self, fixture, point, normal, fraction):
|
|
if (fixture.filterData.categoryBits & 1) == 0:
|
|
return -1
|
|
self.p2 = point
|
|
self.fraction = fraction
|
|
return fraction
|
|
|
|
self.lidar = [LidarCallback() for _ in range(10)]
|
|
if self.render_mode == "human":
|
|
self.render()
|
|
return self.step(np.array([0, 0, 0, 0]))[0], {}
|
|
|
|
def step(self, action: np.ndarray):
|
|
assert self.hull is not None
|
|
|
|
# self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
|
|
control_speed = False # Should be easier as well
|
|
if control_speed:
|
|
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
|
|
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
|
|
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
|
|
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
|
|
else:
|
|
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
|
|
self.joints[0].maxMotorTorque = float(
|
|
MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
|
|
)
|
|
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
|
|
self.joints[1].maxMotorTorque = float(
|
|
MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
|
|
)
|
|
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
|
|
self.joints[2].maxMotorTorque = float(
|
|
MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
|
|
)
|
|
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
|
|
self.joints[3].maxMotorTorque = float(
|
|
MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
|
|
)
|
|
|
|
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
|
|
|
|
pos = self.hull.position
|
|
vel = self.hull.linearVelocity
|
|
|
|
for i in range(10):
|
|
self.lidar[i].fraction = 1.0
|
|
self.lidar[i].p1 = pos
|
|
self.lidar[i].p2 = (
|
|
pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
|
|
pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
|
|
)
|
|
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
|
|
|
|
state = [
|
|
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
|
|
2.0 * self.hull.angularVelocity / FPS,
|
|
0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
|
|
0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
|
|
self.joints[0].angle,
|
|
# This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
|
|
self.joints[0].speed / SPEED_HIP,
|
|
self.joints[1].angle + 1.0,
|
|
self.joints[1].speed / SPEED_KNEE,
|
|
1.0 if self.legs[1].ground_contact else 0.0,
|
|
self.joints[2].angle,
|
|
self.joints[2].speed / SPEED_HIP,
|
|
self.joints[3].angle + 1.0,
|
|
self.joints[3].speed / SPEED_KNEE,
|
|
1.0 if self.legs[3].ground_contact else 0.0,
|
|
]
|
|
state += [l.fraction for l in self.lidar]
|
|
assert len(state) == 24
|
|
|
|
self.scroll = pos.x - VIEWPORT_W / SCALE / 5
|
|
|
|
shaping = (
|
|
130 * pos[0] / SCALE
|
|
) # moving forward is a way to receive reward (normalized to get 300 on completion)
|
|
shaping -= 5.0 * abs(
|
|
state[0]
|
|
) # keep head straight, other than that and falling, any behavior is unpunished
|
|
|
|
reward = 0
|
|
if self.prev_shaping is not None:
|
|
reward = shaping - self.prev_shaping
|
|
self.prev_shaping = shaping
|
|
|
|
for a in action:
|
|
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
|
|
# normalized to about -50.0 using heuristic, more optimal agent should spend less
|
|
|
|
terminated = False
|
|
if self.game_over or pos[0] < 0:
|
|
reward = -100
|
|
terminated = True
|
|
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
|
|
terminated = True
|
|
|
|
if self.render_mode == "human":
|
|
self.render()
|
|
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
|
|
|
def render(self):
|
|
if self.render_mode is None:
|
|
assert self.spec is not None
|
|
gym.logger.warn(
|
|
"You are calling render method without specifying any render mode. "
|
|
"You can specify the render_mode at initialization, "
|
|
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
|
|
)
|
|
return
|
|
|
|
try:
|
|
import pygame
|
|
from pygame import gfxdraw
|
|
except ImportError as e:
|
|
raise DependencyNotInstalled(
|
|
"pygame is not installed, run `pip install gymnasium[box2d]`"
|
|
) from e
|
|
|
|
if self.screen is None and self.render_mode == "human":
|
|
pygame.init()
|
|
pygame.display.init()
|
|
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
|
|
if self.clock is None:
|
|
self.clock = pygame.time.Clock()
|
|
|
|
self.surf = pygame.Surface(
|
|
(VIEWPORT_W + max(0.0, self.scroll) * SCALE, VIEWPORT_H)
|
|
)
|
|
|
|
pygame.transform.scale(self.surf, (SCALE, SCALE))
|
|
|
|
pygame.draw.polygon(
|
|
self.surf,
|
|
color=(215, 215, 255),
|
|
points=[
|
|
(self.scroll * SCALE, 0),
|
|
(self.scroll * SCALE + VIEWPORT_W, 0),
|
|
(self.scroll * SCALE + VIEWPORT_W, VIEWPORT_H),
|
|
(self.scroll * SCALE, VIEWPORT_H),
|
|
],
|
|
)
|
|
|
|
for poly, x1, x2 in self.cloud_poly:
|
|
if x2 < self.scroll / 2:
|
|
continue
|
|
if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
|
|
continue
|
|
pygame.draw.polygon(
|
|
self.surf,
|
|
color=(255, 255, 255),
|
|
points=[
|
|
(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly
|
|
],
|
|
)
|
|
gfxdraw.aapolygon(
|
|
self.surf,
|
|
[(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly],
|
|
(255, 255, 255),
|
|
)
|
|
for poly, color in self.terrain_poly:
|
|
if poly[1][0] < self.scroll:
|
|
continue
|
|
if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
|
|
continue
|
|
scaled_poly = []
|
|
for coord in poly:
|
|
scaled_poly.append([coord[0] * SCALE, coord[1] * SCALE])
|
|
pygame.draw.polygon(self.surf, color=color, points=scaled_poly)
|
|
gfxdraw.aapolygon(self.surf, scaled_poly, color)
|
|
|
|
self.lidar_render = (self.lidar_render + 1) % 100
|
|
i = self.lidar_render
|
|
if i < 2 * len(self.lidar):
|
|
single_lidar = (
|
|
self.lidar[i]
|
|
if i < len(self.lidar)
|
|
else self.lidar[len(self.lidar) - i - 1]
|
|
)
|
|
if hasattr(single_lidar, "p1") and hasattr(single_lidar, "p2"):
|
|
pygame.draw.line(
|
|
self.surf,
|
|
color=(255, 0, 0),
|
|
start_pos=(single_lidar.p1[0] * SCALE, single_lidar.p1[1] * SCALE),
|
|
end_pos=(single_lidar.p2[0] * SCALE, single_lidar.p2[1] * SCALE),
|
|
width=1,
|
|
)
|
|
|
|
for obj in self.drawlist:
|
|
for f in obj.fixtures:
|
|
trans = f.body.transform
|
|
if type(f.shape) is circleShape:
|
|
pygame.draw.circle(
|
|
self.surf,
|
|
color=obj.color1,
|
|
center=trans * f.shape.pos * SCALE,
|
|
radius=f.shape.radius * SCALE,
|
|
)
|
|
pygame.draw.circle(
|
|
self.surf,
|
|
color=obj.color2,
|
|
center=trans * f.shape.pos * SCALE,
|
|
radius=f.shape.radius * SCALE,
|
|
)
|
|
else:
|
|
path = [trans * v * SCALE for v in f.shape.vertices]
|
|
if len(path) > 2:
|
|
pygame.draw.polygon(self.surf, color=obj.color1, points=path)
|
|
gfxdraw.aapolygon(self.surf, path, obj.color1)
|
|
path.append(path[0])
|
|
pygame.draw.polygon(
|
|
self.surf, color=obj.color2, points=path, width=1
|
|
)
|
|
gfxdraw.aapolygon(self.surf, path, obj.color2)
|
|
else:
|
|
pygame.draw.aaline(
|
|
self.surf,
|
|
start_pos=path[0],
|
|
end_pos=path[1],
|
|
color=obj.color1,
|
|
)
|
|
|
|
flagy1 = TERRAIN_HEIGHT * SCALE
|
|
flagy2 = flagy1 + 50
|
|
x = TERRAIN_STEP * 3 * SCALE
|
|
pygame.draw.aaline(
|
|
self.surf, color=(0, 0, 0), start_pos=(x, flagy1), end_pos=(x, flagy2)
|
|
)
|
|
f = [
|
|
(x, flagy2),
|
|
(x, flagy2 - 10),
|
|
(x + 25, flagy2 - 5),
|
|
]
|
|
pygame.draw.polygon(self.surf, color=(230, 51, 0), points=f)
|
|
pygame.draw.lines(
|
|
self.surf, color=(0, 0, 0), points=f + [f[0]], width=1, closed=False
|
|
)
|
|
|
|
self.surf = pygame.transform.flip(self.surf, False, True)
|
|
|
|
if self.render_mode == "human":
|
|
assert self.screen is not None
|
|
self.screen.blit(self.surf, (-self.scroll * SCALE, 0))
|
|
pygame.event.pump()
|
|
self.clock.tick(self.metadata["render_fps"])
|
|
pygame.display.flip()
|
|
elif self.render_mode == "rgb_array":
|
|
return np.transpose(
|
|
np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
|
|
)[:, -VIEWPORT_W:]
|
|
|
|
def close(self):
|
|
if self.screen is not None:
|
|
import pygame
|
|
|
|
pygame.display.quit()
|
|
pygame.quit()
|
|
self.isopen = False
|
|
|
|
|
|
class BipedalWalkerHardcore:
|
|
def __init__(self):
|
|
raise error.Error(
|
|
"Error initializing BipedalWalkerHardcore Environment.\n"
|
|
"Currently, we do not support initializing this mode of environment by calling the class directly.\n"
|
|
"To use this environment, instead create it by specifying the hardcore keyword in gym.make, i.e.\n"
|
|
'gym.make("BipedalWalker-v3", hardcore=True)'
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Heurisic: suboptimal, have no notion of balance.
|
|
env = BipedalWalker()
|
|
env.reset()
|
|
steps = 0
|
|
total_reward = 0
|
|
a = np.array([0.0, 0.0, 0.0, 0.0])
|
|
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
|
|
SPEED = 0.29 # Will fall forward on higher speed
|
|
state = STAY_ON_ONE_LEG
|
|
moving_leg = 0
|
|
supporting_leg = 1 - moving_leg
|
|
SUPPORT_KNEE_ANGLE = +0.1
|
|
supporting_knee_angle = SUPPORT_KNEE_ANGLE
|
|
while True:
|
|
s, r, terminated, truncated, info = env.step(a)
|
|
total_reward += r
|
|
if steps % 20 == 0 or terminated or truncated:
|
|
print("\naction " + str([f"{x:+0.2f}" for x in a]))
|
|
print(f"step {steps} total_reward {total_reward:+0.2f}")
|
|
print("hull " + str([f"{x:+0.2f}" for x in s[0:4]]))
|
|
print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]]))
|
|
print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]]))
|
|
steps += 1
|
|
|
|
contact0 = s[8]
|
|
contact1 = s[13]
|
|
moving_s_base = 4 + 5 * moving_leg
|
|
supporting_s_base = 4 + 5 * supporting_leg
|
|
|
|
hip_targ = [None, None] # -0.8 .. +1.1
|
|
knee_targ = [None, None] # -0.6 .. +0.9
|
|
hip_todo = [0.0, 0.0]
|
|
knee_todo = [0.0, 0.0]
|
|
|
|
if state == STAY_ON_ONE_LEG:
|
|
hip_targ[moving_leg] = 1.1
|
|
knee_targ[moving_leg] = -0.6
|
|
supporting_knee_angle += 0.03
|
|
if s[2] > SPEED:
|
|
supporting_knee_angle += 0.03
|
|
supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
|
|
knee_targ[supporting_leg] = supporting_knee_angle
|
|
if s[supporting_s_base + 0] < 0.10: # supporting leg is behind
|
|
state = PUT_OTHER_DOWN
|
|
if state == PUT_OTHER_DOWN:
|
|
hip_targ[moving_leg] = +0.1
|
|
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
|
|
knee_targ[supporting_leg] = supporting_knee_angle
|
|
if s[moving_s_base + 4]:
|
|
state = PUSH_OFF
|
|
supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
|
|
if state == PUSH_OFF:
|
|
knee_targ[moving_leg] = supporting_knee_angle
|
|
knee_targ[supporting_leg] = +1.0
|
|
if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
|
|
state = STAY_ON_ONE_LEG
|
|
moving_leg = 1 - moving_leg
|
|
supporting_leg = 1 - moving_leg
|
|
|
|
if hip_targ[0]:
|
|
hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
|
|
if hip_targ[1]:
|
|
hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
|
|
if knee_targ[0]:
|
|
knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
|
|
if knee_targ[1]:
|
|
knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
|
|
|
|
hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait
|
|
hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
|
|
knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations
|
|
knee_todo[1] -= 15.0 * s[3]
|
|
|
|
a[0] = hip_todo[0]
|
|
a[1] = knee_todo[0]
|
|
a[2] = hip_todo[1]
|
|
a[3] = knee_todo[1]
|
|
a = np.clip(0.5 * a, -1.0, 1.0)
|
|
|
|
if terminated or truncated:
|
|
break
|