I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,15 @@
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # isort:skip
# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly
from gymnasium.envs.mujoco.ant import AntEnv
from gymnasium.envs.mujoco.half_cheetah import HalfCheetahEnv
from gymnasium.envs.mujoco.hopper import HopperEnv
from gymnasium.envs.mujoco.humanoid import HumanoidEnv
from gymnasium.envs.mujoco.humanoidstandup import HumanoidStandupEnv
from gymnasium.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
from gymnasium.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
from gymnasium.envs.mujoco.pusher import PusherEnv
from gymnasium.envs.mujoco.reacher import ReacherEnv
from gymnasium.envs.mujoco.swimmer import SwimmerEnv
from gymnasium.envs.mujoco.walker2d import Walker2dEnv

View File

@ -0,0 +1,80 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class AntEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, "ant.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore) / self.dt
ctrl_cost = 0.5 * np.square(a).sum()
contact_cost = (
0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
)
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
not_terminated = (
np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
)
terminated = not not_terminated
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return (
ob,
reward,
terminated,
False,
dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward,
),
)
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.1, high=0.1
)
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@ -0,0 +1,187 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class AntEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="ant.xml",
ctrl_cost_weight=0.5,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
ctrl_cost_weight,
contact_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
contact_force_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._contact_force_range = contact_force_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def contact_forces(self):
raw_contact_forces = self.sim.data.cfrc_ext
min_value, max_value = self._contact_force_range
contact_forces = np.clip(raw_contact_forces, min_value, max_value)
return contact_forces
@property
def contact_cost(self):
contact_cost = self._contact_cost_weight * np.sum(
np.square(self.contact_forces)
)
return contact_cost
@property
def is_healthy(self):
state = self.state_vector()
min_z, max_z = self._healthy_z_range
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def step(self, action):
xy_position_before = self.get_body_com("torso")[:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.get_body_com("torso")[:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
forward_reward = x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost + contact_cost
reward = rewards - costs
terminated = self.terminated
observation = self._get_obs()
info = {
"reward_forward": forward_reward,
"reward_ctrl": -ctrl_cost,
"reward_contact": -contact_cost,
"reward_survive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
contact_force = self.contact_forces.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
observations = np.concatenate((position, velocity, contact_force))
return observations
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,379 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class AntEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the environment introduced by Schulman,
Moritz, Levine, Jordan and Abbeel in ["High-Dimensional Continuous Control
Using Generalized Advantage Estimation"](https://arxiv.org/abs/1506.02438).
The ant is a 3D robot consisting of one torso (free rotational body) with
four legs attached to it with each leg having two body parts. The goal is to
coordinate the four legs to move in the forward (right) direction by applying
torques on the eight hinges connecting the two body parts of each leg and the torso
(nine body parts and eight hinges).
## Action Space
The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
| 0 | Torque applied on the rotor between the torso and back right hip | -1 | 1 | hip_4 (right_back_leg) | hinge | torque (N m) |
| 1 | Torque applied on the rotor between the back right two links | -1 | 1 | angle_4 (right_back_leg) | hinge | torque (N m) |
| 2 | Torque applied on the rotor between the torso and front left hip | -1 | 1 | hip_1 (front_left_leg) | hinge | torque (N m) |
| 3 | Torque applied on the rotor between the front left two links | -1 | 1 | angle_1 (front_left_leg) | hinge | torque (N m) |
| 4 | Torque applied on the rotor between the torso and front right hip | -1 | 1 | hip_2 (front_right_leg) | hinge | torque (N m) |
| 5 | Torque applied on the rotor between the front right two links | -1 | 1 | angle_2 (front_right_leg) | hinge | torque (N m) |
| 6 | Torque applied on the rotor between the torso and back left hip | -1 | 1 | hip_3 (back_leg) | hinge | torque (N m) |
| 7 | Torque applied on the rotor between the back left two links | -1 | 1 | angle_3 (back_leg) | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the ant,
followed by the velocities of those individual parts (their derivatives) with all
the positions ordered before all the velocities.
By default, observations do not include the x- and y-coordinates of the ant's torso. These may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (29,), float64)` where the first two observations
represent the x- and y- coordinates of the ant's torso.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
of the torso will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
However, by default, observation Space is a `Box(-Inf, Inf, (27,), float64)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
|-----|--------------------------------------------------------------|--------|--------|----------------------------------------|-------|--------------------------|
| 0 | z-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) |
| 1 | x-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 2 | y-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 3 | z-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 4 | w-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 5 | angle between torso and first link on front left | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) |
| 6 | angle between the two links on the front left | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) |
| 7 | angle between torso and first link on front right | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) |
| 8 | angle between the two links on the front right | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) |
| 9 | angle between torso and first link on back left | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) |
| 10 | angle between the two links on the back left | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) |
| 11 | angle between torso and first link on back right | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) |
| 12 | angle between the two links on the back right | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) |
| 13 | x-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
| 14 | y-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
| 15 | z-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
| 16 | x-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
| 17 | y-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
| 18 | z-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
| 19 | angular velocity of angle between torso and front left link | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) |
| 20 | angular velocity of the angle between front left links | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) |
| 21 | angular velocity of angle between torso and front right link | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) |
| 22 | angular velocity of the angle between front right links | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) |
| 23 | angular velocity of angle between torso and back left link | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) |
| 24 | angular velocity of the angle between back left links | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) |
| 25 | angular velocity of angle between torso and back right link | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) |
| 26 | angular velocity of the angle between back right links | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) |
| excluded | x-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) |
| excluded | y-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) |
If version < `v4` or `use_contact_forces` is `True` then the observation space is extended by 14*6 = 84 elements, which are contact forces
(external forces - force x, y, z and torque x, y, z) applied to the
center of mass of each of the body parts. The 14 body parts are:
| id (for `v2`, `v3`, `v4)` | body parts |
| --- | ------------ |
| 0 | worldbody (note: forces are always full of zeros) |
| 1 | torso |
| 2 | front_left_leg |
| 3 | aux_1 (front left leg) |
| 4 | ankle_1 (front left leg) |
| 5 | front_right_leg |
| 6 | aux_2 (front right leg) |
| 7 | ankle_2 (front right leg) |
| 8 | back_leg (back left leg) |
| 9 | aux_3 (back left leg) |
| 10 | ankle_3 (back left leg) |
| 11 | right_back_leg |
| 12 | aux_4 (back right leg) |
| 13 | ankle_4 (back right leg) |
The (x,y,z) coordinates are translational DOFs while the orientations are rotational
DOFs expressed as quaternions. One can read more about free joints on the [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
**Note:** Ant-v4 environment no longer has the following contact forces issue.
If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
when using the Ant environment if you would like to report results with contact forces (if
contact forces are not used in your experiments, you can use version > 2.0).
## Rewards
The reward consists of three parts:
- *healthy_reward*: Every timestep that the ant is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`
- *forward_reward*: A reward of moving forward which is measured as
*(x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time
between actions and is dependent on the `frame_skip` parameter (default is 5),
where the frametime is 0.01 - making the default *dt = 5 * 0.01 = 0.05*.
This reward would be positive if the ant moves forward (in positive x direction).
- *ctrl_cost*: A negative reward for penalising the ant if it takes actions
that are too large. It is measured as *`ctrl_cost_weight` * sum(action<sup>2</sup>)*
where *`ctr_cost_weight`* is a parameter set for the control and has a default value of 0.5.
- *contact_cost*: A negative reward for penalising the ant if the external contact
force is too large. It is calculated *`contact_cost_weight` * sum(clip(external contact
force to `contact_force_range`)<sup>2</sup>)*.
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost*.
But if `use_contact_forces=True` or version < `v4`
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost*.
In either case `info` will also contain the individual reward terms.
## Starting State
All observations start in state
(0.0, 0.0, 0.75, 1.0, 0.0 ... 0.0) with a uniform noise in the range
of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional values and standard normal noise
with mean 0 and standard deviation `reset_noise_scale` added to the velocity values for
stochasticity. Note that the initial z coordinate is intentionally selected
to be slightly high, thereby indicating a standing up ant. The initial orientation
is designed to make it face forward as well.
## Episode End
The ant is said to be unhealthy if any of the following happens:
1. Any of the state space values is no longer finite
2. The z-coordinate of the torso is **not** in the closed interval given by `healthy_z_range` (defaults to [0.2, 1.0])
If `terminate_when_unhealthy=True` is passed during construction (which is the default),
the episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
2. Termination: The ant is unhealthy
If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('Ant-v2')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Ant-v4', ctrl_cost_weight=0.1, ...)
```
| Parameter | Type | Default |Description |
|-------------------------|------------|--------------|-------------------------------|
| `xml_file` | **str** | `"ant.xml"` | Path to a MuJoCo model |
| `ctrl_cost_weight` | **float** | `0.5` | Weight for *ctrl_cost* term (see section on reward) |
| `use_contact_forces` | **bool** | `False` | If true, it extends the observation space by adding contact forces (see `Observation Space` section) and includes contact_cost to the reward function (see `Rewards` section) |
| `contact_cost_weight` | **float** | `5e-4` | Weight for *contact_cost* term (see section on reward) |
| `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep |
| `terminate_when_unhealthy` | **bool**| `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` |
| `healthy_z_range` | **tuple** | `(0.2, 1)` | The ant is considered healthy if the z-coordinate of the torso is in this range |
| `contact_force_range` | **tuple** | `(-1, 1)` | Contact forces are clipped to this range in the computation of *contact_cost* |
| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation`| **bool** | `True`| Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3, also removed contact forces from the default observation space (new variable `use_contact_forces=True` can restore them)
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="ant.xml",
ctrl_cost_weight=0.5,
use_contact_forces=False,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
ctrl_cost_weight,
use_contact_forces,
contact_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
contact_force_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._contact_force_range = contact_force_range
self._reset_noise_scale = reset_noise_scale
self._use_contact_forces = use_contact_forces
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
obs_shape = 27
if not exclude_current_positions_from_observation:
obs_shape += 2
if use_contact_forces:
obs_shape += 84
observation_space = Box(
low=-np.inf, high=np.inf, shape=(obs_shape,), dtype=np.float64
)
MujocoEnv.__init__(
self,
xml_file,
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def contact_forces(self):
raw_contact_forces = self.data.cfrc_ext
min_value, max_value = self._contact_force_range
contact_forces = np.clip(raw_contact_forces, min_value, max_value)
return contact_forces
@property
def contact_cost(self):
contact_cost = self._contact_cost_weight * np.sum(
np.square(self.contact_forces)
)
return contact_cost
@property
def is_healthy(self):
state = self.state_vector()
min_z, max_z = self._healthy_z_range
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def step(self, action):
xy_position_before = self.get_body_com("torso")[:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.get_body_com("torso")[:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
forward_reward = x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost = self.control_cost(action)
terminated = self.terminated
observation = self._get_obs()
info = {
"reward_forward": forward_reward,
"reward_ctrl": -ctrl_cost,
"reward_survive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self._use_contact_forces:
contact_cost = self.contact_cost
costs += contact_cost
info["reward_ctrl"] = -contact_cost
reward = rewards - costs
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = self.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
if self._use_contact_forces:
contact_force = self.contact_forces.flat.copy()
return np.concatenate((position, velocity, contact_force))
else:
return np.concatenate((position, velocity))
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,81 @@
<mujoco model="ant">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.01"/>
<custom>
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
</custom>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0.75">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
<body name="front_left_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
<body name="aux_1" pos="0.2 0.2 0">
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 0.2 0">
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="front_right_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
<body name="aux_2" pos="-0.2 0.2 0">
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 0.2 0">
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
<body name="aux_3" pos="-0.2 -0.2 0">
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 -0.2 0">
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="right_back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
<body name="aux_4" pos="0.2 -0.2 0">
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 -0.2 0">
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,96 @@
<!-- Cheetah Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- rootx slider position (m)
- rootz slider position (m)
- rooty hinge angle (rad)
- bthigh hinge angle (rad)
- bshin hinge angle (rad)
- bfoot hinge angle (rad)
- fthigh hinge angle (rad)
- fshin hinge angle (rad)
- ffoot hinge angle (rad)
- rootx slider velocity (m/s)
- rootz slider velocity (m/s)
- rooty hinge angular velocity (rad/s)
- bthigh hinge angular velocity (rad/s)
- bshin hinge angular velocity (rad/s)
- bfoot hinge angular velocity (rad/s)
- fthigh hinge angular velocity (rad/s)
- fshin hinge angular velocity (rad/s)
- ffoot hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- bthigh hinge torque (N m)
- bshin hinge torque (N m)
- bfoot hinge torque (N m)
- fthigh hinge torque (N m)
- fshin hinge torque (N m)
- ffoot hinge torque (N m)
-->
<mujoco model="cheetah">
<compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
<default>
<joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
<geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
<motor ctrllimited="true" ctrlrange="-1 1"/>
</default>
<size nstack="300000" nuser_geom="1"/>
<option gravity="0 0 -9.81" timestep="0.01"/>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 .7">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
<geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
<!-- <site name='tip' pos='.15 0 .11'/>-->
<body name="bthigh" pos="-.5 0 0">
<joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
<geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
<body name="bshin" pos=".16 0 -.25">
<joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
<body name="bfoot" pos="-.28 0 -.14">
<joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
</body>
</body>
</body>
<body name="fthigh" pos=".5 0 0">
<joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
<body name="fshin" pos="-.14 0 -.24">
<joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
<body name="ffoot" pos=".13 0 -.18">
<joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor gear="120" joint="bthigh" name="bthigh"/>
<motor gear="90" joint="bshin" name="bshin"/>
<motor gear="60" joint="bfoot" name="bfoot"/>
<motor gear="120" joint="fthigh" name="fthigh"/>
<motor gear="60" joint="fshin" name="fshin"/>
<motor gear="30" joint="ffoot" name="ffoot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,53 @@
<!--
Hopper model for `Hopper-v5`, based on openai/gym/Walker2d
modified by @saran_t
- To not require `coordinate="global"`
-->
<mujoco model="hopper">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 -0.25" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 -1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 -1.25" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom friction="0.9" name="torso_geom" size="0.05 0.19999999999999996" type="capsule"/>
<body name="thigh" pos="0 0 -0.19999999999999996">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 0" range="-150 0" type="hinge"/>
<geom friction="0.9" pos="0 0 -0.22500000000000009" name="thigh_geom" size="0.05 0.22500000000000003" type="capsule"/>
<body name="leg" pos="0 0 -0.70000000000000007">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
<geom friction="0.9" name="leg_geom" size="0.04 0.25" type="capsule"/>
<body name="foot" pos="0.13 0 -0.35">
<joint axis="0 -1 0" name="foot_joint" pos="-0.13 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" pos="-0.065 0 0.1" quat="0.70710678118654757 0 -0.70710678118654746 0" name="foot_geom" size="0.06 0.195" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,121 @@
<mujoco model="humanoid">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 1.4">
<camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
<geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 -0.04">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0 0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0 0 -0.45">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 -0.04">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0 -0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0 0 -0.45">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,121 @@
<mujoco model="humanoidstandup">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 .105">
<camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
<geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 0">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0.403 0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0.35 0 -.10">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 0">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0.403 -0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0.35 0 -.1">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,47 @@
<!-- Cartpole Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- cart slider position (m)
- pole hinge angle (rad)
- cart slider velocity (m/s)
- pole hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- cart motor force x (N)
-->
<mujoco model="cartpole">
<compiler coordinate="local" inertiafromgeom="true"/>
<custom>
<numeric data="2" name="frame_skip"/>
</custom>
<default>
<joint damping="0.05"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
<size nstack="3000"/>
<worldbody>
<geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<body name="pole2" pos="0 0 0.6">
<joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<site name="tip" pos="0 0 .6" size="0.01 0.01"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,27 @@
<mujoco model="inverted pendulum">
<compiler inertiafromgeom="true"/>
<default>
<joint armature="0" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
<tendon/>
<motor ctrlrange="-3 3"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
<size nstack="3000"/>
<worldbody>
<!--geom name="ground" type="plane" pos="0 0 0" /-->
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
<!-- <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-3 3" gear="100" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,31 @@
<mujoco>
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.02"/>
<default>
<joint armature="0" damping="0" limited="false"/>
<geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0">
<geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
<geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
<joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
</body>
</worldbody>
<actuator>
<!-- Those are just dummy actuators for providing ranges -->
<motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
<motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,91 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
<default>
<joint armature='0.04' damping="1" limited="true"/>
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0">
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
<body name="r_wrist_flex_link" pos="0.321 0 0">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
<body name="r_wrist_roll_link" pos="0 0 0">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
<body name="tips_arm" pos="0 0 0">
<geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
<geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
</body>
<geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<!--<body name="object" pos="0.55 -0.3 -0.275" >-->
<body name="object" pos="0.45 -0.05 -0.275" >
<geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
<geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
<joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
<body name="goal" pos="0.45 -0.05 -0.3230">
<geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,39 @@
<mujoco model="reacher">
<compiler angle="radian" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
<worldbody>
<!-- Arena -->
<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto=" .3 -.3 .01 .3 .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 .3 .01 .3 .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<!-- Arm -->
<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
<body name="body0" pos="0 0 .01">
<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
<body name="body1" pos="0.1 0 0">
<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<body name="fingertip" pos="0.11 0 0">
<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
</body>
</body>
</body>
<!-- Target -->
<body name="target" pos=".1 -.1 .01">
<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,39 @@
<mujoco model="swimmer">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
<default>
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
<joint armature='0.1' />
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
<!-- ================= SWIMMER ================= /-->
<body name="torso" pos="0 0 0">
<camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1"/>
<geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" name="free_body_rot" pos="0 0 0" type="hinge"/>
<body name="mid" pos="0.5 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="motor1_rot" pos="0 0 0" range="-100 100" type="hinge"/>
<body name="back" pos="-1 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="motor2_rot" pos="0 0 0" range="-100 100" type="hinge"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="motor1_rot"/>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="motor2_rot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,68 @@
<!--
Walker2D model for `Walker2d-v5`, based on openai/gym/Walker2d
modified by @kallinteris-Andreas
- To not require `coordinate="global"`
- keep feet friction to 0.9/1.9
-->
<mujoco model="walker2d">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="0.01" damping=".1" limited="true"/>
<geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 -0.25" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 -1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 -1.25" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom friction="0.9" name="torso_geom" size="0.050000000000000003 0.19999999999999996" type="capsule"/>
<body name="thigh" pos="0 0 -0.19999999999999996">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 0" range="-150 0" type="hinge"/>
<geom friction="0.9" pos="0 0 -0.22500000000000009" name="thigh_geom" size="0.050000000000000003 0.22500000000000003" type="capsule"/>
<body name="leg" pos="0 0 -0.70000000000000007">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
<geom friction="0.9" name="leg_geom" size="0.040000000000000001 0.25" type="capsule"/>
<body name="foot" pos="0.20000000000000001 0 -0.34999999999999998">
<joint axis="0 -1 0" name="foot_joint" pos="-0.20000000000000001 0 0.10000000000000001" range="-45 45" type="hinge"/>
<geom friction="0.9" pos="-0.10000000000000001 0 0.10000000000000001" quat="0.70710678118654757 0 -0.70710678118654746 0" name="foot_geom" size="0.059999999999999998 0.10000000000000001" type="capsule"/>
</body>
</body>
</body>
<!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
<body name="thigh_left" pos="0 0 -0.19999999999999996">
<joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 0" range="-150 0" type="hinge"/>
<geom friction="0.9" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.050000000000000003 0.22500000000000003" pos="0 0 -0.22500000000000009" type="capsule"/>
<body name="leg_left" pos="0 0 -0.70000000000000007">
<joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
<geom friction="0.9" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.040000000000000001 0.25" type="capsule"/>
<body name="foot_left" pos="0.20000000000000001 0 -0.34999999999999998">
<joint axis="0 -1 0" name="foot_left_joint" pos="-0.20000000000000001 0 0.10000000000000001" range="-45 45" type="hinge"/>
<geom friction="1.9" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.059999999999999998 0.10000000000000001" pos="-0.10000000000000001 0 0.10000000000000001" type="capsule" quat="0.70710678118654757 0 -0.70710678118654746 0"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
<!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,64 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = -0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore) / self.dt
reward = reward_ctrl + reward_run
terminated = False
if self.render_mode == "human":
self.render()
return (
ob,
reward,
terminated,
False,
dict(reward_run=reward_run, reward_ctrl=reward_ctrl),
)
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@ -0,0 +1,128 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="half_cheetah.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
observation = self._get_obs()
reward = forward_reward - ctrl_cost
terminated = False
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
"reward_run": forward_reward,
"reward_ctrl": -ctrl_cost,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,244 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the work by P. Wawrzyński in
["A Cat-Like Robot Real-Time Learning to Run"](http://staff.elka.pw.edu.pl/~pwawrzyn/pub-s/0812_LSCLRR.pdf).
The HalfCheetah is a 2-dimensional robot consisting of 9 body parts and 8
joints connecting them (including two paws). The goal is to apply a torque
on the joints to make the cheetah run forward (right) as fast as possible,
with a positive reward allocated based on the distance moved forward and a
negative reward allocated for moving backward. The torso and head of the
cheetah are fixed, and the torque can only be applied on the other 6 joints
over the front and back thighs (connecting to the torso), shins
(connecting to the thighs) and feet (connecting to the shins).
## Action Space
The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
| 0 | Torque applied on the back thigh rotor | -1 | 1 | bthigh | hinge | torque (N m) |
| 1 | Torque applied on the back shin rotor | -1 | 1 | bshin | hinge | torque (N m) |
| 2 | Torque applied on the back foot rotor | -1 | 1 | bfoot | hinge | torque (N m) |
| 3 | Torque applied on the front thigh rotor | -1 | 1 | fthigh | hinge | torque (N m) |
| 4 | Torque applied on the front shin rotor | -1 | 1 | fshin | hinge | torque (N m) |
| 5 | Torque applied on the front foot rotor | -1 | 1 | ffoot | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the
cheetah, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
By default, observations do not include the cheetah's `rootx`. It may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (18,), float64)` where the first element
represents the `rootx`.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the
will be returned in `info` with key `"x_position"`.
However, by default, the observation is a `Box(-Inf, Inf, (17,), float64)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | z-coordinate of the front tip | -Inf | Inf | rootz | slide | position (m) |
| 1 | angle of the front tip | -Inf | Inf | rooty | hinge | angle (rad) |
| 2 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angle (rad) |
| 3 | angle of the second rotor | -Inf | Inf | bshin | hinge | angle (rad) |
| 4 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angle (rad) |
| 5 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angle (rad) |
| 6 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angle (rad) |
| 7 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angle (rad) |
| 8 | x-coordinate of the front tip | -Inf | Inf | rootx | slide | velocity (m/s) |
| 9 | y-coordinate of the front tip | -Inf | Inf | rootz | slide | velocity (m/s) |
| 10 | angle of the front tip | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
| 11 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angular velocity (rad/s) |
| 12 | angle of the second rotor | -Inf | Inf | bshin | hinge | angular velocity (rad/s) |
| 13 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angular velocity (rad/s) |
| 14 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angular velocity (rad/s) |
| 15 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angular velocity (rad/s) |
| 16 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angular velocity (rad/s) |
| excluded | x-coordinate of the front tip | -Inf | Inf | rootx | slide | position (m) |
## Rewards
The reward consists of two parts:
- *forward_reward*: A reward of moving forward which is measured
as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
the time between actions and is dependent on the frame_skip parameter
(fixed to 5), where the frametime is 0.01 - making the
default *dt = 5 * 0.01 = 0.05*. This reward would be positive if the cheetah
runs forward (right).
- *ctrl_cost*: A cost for penalising the cheetah if it takes
actions that are too large. It is measured as *`ctrl_cost_weight` *
sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
control and has a default value of 0.1
The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,) with a noise added to the
initial state for stochasticity. As seen before, the first 8 values in the
state are positional and the last 9 values are velocity. A uniform noise in
the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the positional values while a standard
normal noise with a mean of 0 and standard deviation of `reset_noise_scale` is added to the
initial velocity values of all zeros.
## Episode End
The episode truncates when the episode length is greater than 1000.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('HalfCheetah-v2')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('HalfCheetah-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"half_cheetah.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ weight (see section on reward) |
| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"half_cheetah.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
x_position_before = self.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
observation = self._get_obs()
reward = forward_reward - ctrl_cost
terminated = False
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
"reward_run": forward_reward,
"reward_ctrl": -ctrl_cost,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = self.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,67 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "hopper.xml", 4, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
s = self.state_vector()
terminated = not (
np.isfinite(s).all()
and (np.abs(s[2:]) < 100).all()
and (height > 0.7)
and (abs(ang) < 0.2)
)
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.75
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,178 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 3.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
xml_file="hopper.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float("inf")),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_state_range,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_state_range = healthy_state_range
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 4, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.sim.data.qpos[1:3]
state = self.state_vector()[2:]
min_state, max_state = self._healthy_state_range
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = all((healthy_state, healthy_z, healthy_angle))
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,298 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 3.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class HopperEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the work done by Erez, Tassa, and Todorov in
["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf). The environment aims to
increase the number of independent state and control variables as compared to
the classic control environments. The hopper is a two-dimensional
one-legged figure that consist of four main body parts - the torso at the
top, the thigh in the middle, the leg in the bottom, and a single foot on
which the entire body rests. The goal is to make hops that move in the
forward (right) direction by applying torques on the three hinges
connecting the four body parts.
## Action Space
The action space is a `Box(-1, 1, (3,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
| 0 | Torque applied on the thigh rotor | -1 | 1 | thigh_joint | hinge | torque (N m) |
| 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) |
| 2 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the
hopper, followed by the velocities of those individual parts
(their derivatives) with all the positions ordered before all the velocities.
By default, observations do not include the x-coordinate of the hopper. It may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be `Box(-Inf, Inf, (12,), float64)` where the first observation
represents the x-coordinate of the hopper.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
will be returned in `info` with key `"x_position"`.
However, by default, the observation is a `Box(-Inf, Inf, (11,), float64)` where the elements
correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | -------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | z-coordinate of the torso (height of hopper) | -Inf | Inf | rootz | slide | position (m) |
| 1 | angle of the torso | -Inf | Inf | rooty | hinge | angle (rad) |
| 2 | angle of the thigh joint | -Inf | Inf | thigh_joint | hinge | angle (rad) |
| 3 | angle of the leg joint | -Inf | Inf | leg_joint | hinge | angle (rad) |
| 4 | angle of the foot joint | -Inf | Inf | foot_joint | hinge | angle (rad) |
| 5 | velocity of the x-coordinate of the torso | -Inf | Inf | rootx | slide | velocity (m/s) |
| 6 | velocity of the z-coordinate (height) of the torso | -Inf | Inf | rootz | slide | velocity (m/s) |
| 7 | angular velocity of the angle of the torso | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
| 8 | angular velocity of the thigh hinge | -Inf | Inf | thigh_joint | hinge | angular velocity (rad/s) |
| 9 | angular velocity of the leg hinge | -Inf | Inf | leg_joint | hinge | angular velocity (rad/s) |
| 10 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) |
| excluded | x-coordinate of the torso | -Inf | Inf | rootx | slide | position (m) |
## Rewards
The reward consists of three parts:
- *healthy_reward*: Every timestep that the hopper is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`.
- *forward_reward*: A reward of hopping forward which is measured
as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
the time between actions and is dependent on the frame_skip parameter
(fixed to 4), where the frametime is 0.002 - making the
default *dt = 4 * 0.002 = 0.008*. This reward would be positive if the hopper
hops forward (positive x direction).
- *ctrl_cost*: A cost for penalising the hopper if it takes
actions that are too large. It is measured as *`ctrl_cost_weight` *
sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
control and has a default value of 0.001
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state
(0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise
in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity.
## Episode End
The hopper is said to be unhealthy if any of the following happens:
1. An element of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) is no longer contained in the closed interval specified by the argument `healthy_state_range`
2. The height of the hopper (`observation[0]` if `exclude_current_positions_from_observation=True`, else `observation[1]`) is no longer contained in the closed interval specified by the argument `healthy_z_range` (usually meaning that it has fallen)
3. The angle (`observation[1]` if `exclude_current_positions_from_observation=True`, else `observation[2]`) is no longer contained in the closed interval specified by the argument `healthy_angle_range`
If `terminate_when_unhealthy=True` is passed during construction (which is the default),
the episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
2. Termination: The hopper is unhealthy
If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('Hopper-v2')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Hopper-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"hopper.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `0.001` | Weight for _ctrl_cost_ reward (see section on reward) |
| `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep |
| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the hopper is no longer healthy |
| `healthy_state_range` | **tuple** | `(-100, 100)` | The elements of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) must be in this range for the hopper to be considered healthy |
| `healthy_z_range` | **tuple** | `(0.7, float("inf"))` | The z-coordinate must be in this range for the hopper to be considered healthy |
| `healthy_angle_range` | **tuple** | `(-0.2, 0.2)` | The angle given by `observation[1]` (if `exclude_current_positions_from_observation=True`, else `observation[2]`) must be in this range for the hopper to be considered healthy |
| `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float("inf")),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_state_range,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_state_range = healthy_state_range
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"hopper.xml",
4,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.data.qpos[1:3]
state = self.state_vector()[2:]
min_state, max_state = self._healthy_state_range
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = all((healthy_state, healthy_z, healthy_angle))
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = np.clip(self.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,94 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, "humanoid.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.sim.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.sim.data
lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.sim.data.qpos
terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
terminated,
False,
dict(
reward_linvel=lin_vel_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_alive=alive_bonus,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 2.0
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,200 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 2.0)),
"elevation": -20.0,
}
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, axis=1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(
self,
xml_file="humanoid.xml",
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
contact_cost_weight=5e-7,
contact_cost_range=(-np.inf, 10.0),
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
contact_cost_weight,
contact_cost_range,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._contact_cost_range = contact_cost_range
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl))
return control_cost
@property
def contact_cost(self):
contact_forces = self.sim.data.cfrc_ext
contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces))
min_cost, max_cost = self._contact_cost_range
contact_cost = np.clip(contact_cost, min_cost, max_cost)
return contact_cost
@property
def is_healthy(self):
min_z, max_z = self._healthy_z_range
is_healthy = min_z < self.sim.data.qpos[2] < max_z
return is_healthy
@property
def terminated(self):
terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
com_inertia = self.sim.data.cinert.flat.copy()
com_velocity = self.sim.data.cvel.flat.copy()
actuator_forces = self.sim.data.qfrc_actuator.flat.copy()
external_contact_forces = self.sim.data.cfrc_ext.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
return np.concatenate(
(
position,
velocity,
com_inertia,
com_velocity,
actuator_forces,
external_contact_forces,
)
)
def step(self, action):
xy_position_before = mass_center(self.model, self.sim)
self.do_simulation(action, self.frame_skip)
xy_position_after = mass_center(self.model, self.sim)
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost + contact_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"reward_linvel": forward_reward,
"reward_quadctrl": -ctrl_cost,
"reward_alive": healthy_reward,
"reward_impact": -contact_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,422 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 2.0)),
"elevation": -20.0,
}
def mass_center(model, data):
mass = np.expand_dims(model.body_mass, axis=1)
xpos = data.xipos
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
class HumanoidEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the environment introduced by Tassa, Erez and Todorov
in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a pair of
legs and arms. The legs each consist of three body parts, and the arms 2 body parts (representing the knees and
elbows respectively). The goal of the environment is to walk forward as fast as possible without falling over.
## Action Space
The action space is a `Box(-1, 1, (17,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|----------------------|---------------|----------------|---------------------------------------|-------|------|
| 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | abdomen_y | hinge | torque (N m) |
| 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | abdomen_z | hinge | torque (N m) |
| 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | abdomen_x | hinge | torque (N m) |
| 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) |
| 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) |
| 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) |
| 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) |
| 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) |
| 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) |
| 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) |
| 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) |
| 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) |
| 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) |
| 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) |
| 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) |
| 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) |
| 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the Humanoid,
followed by the velocities of those individual parts (their derivatives) with all the
positions ordered before all the velocities.
By default, observations do not include the x- and y-coordinates of the torso. These may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (378,), float64)` where the first two observations
represent the x- and y-coordinates of the torso.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
However, by default, the observation is a `Box(-Inf, Inf, (376,), float64)`. The elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
| 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) |
| 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) |
| 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) |
| 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) |
| 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) |
| 10 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) |
| 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) |
| 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) |
| 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) |
| 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) |
| 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) |
| 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) |
| 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) |
| 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) |
| 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) |
| 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) |
| 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) |
| 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) |
| 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) |
| 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) |
| 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) |
| 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) |
| 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) |
| 34 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) |
| 35 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) |
| 36 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) |
| 37 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) |
| 38 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) |
| 39 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) |
| 40 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) |
| 41 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) |
| 42 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) |
| 43 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) |
| 44 | angular velocity of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) |
| excluded | x-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| excluded | y-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
Additionally, after all the positional and velocity based values in the table,
the observation contains (in order):
- *cinert:* Mass and inertia of a single rigid body relative to the center of mass
(this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
and hence adds to another 140 elements in the state space.
- *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
adds another 84 elements in the state space
- *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
`(23,)` *(nv * 1)* and hence adds another 23 elements to the state space.
- *cfrc_ext:* This is the center of mass based external force on the body. It has shape
14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
where *nbody* stands for the number of bodies in the robot and *nv* stands for the
number of degrees of freedom (*= dim(qvel)*)
The body parts are:
| id (for `v2`,`v3`,`v4`) | body part |
| --- | ------------ |
| 0 | worldBody (note: all values are constant 0) |
| 1 | torso |
| 2 | lwaist |
| 3 | pelvis |
| 4 | right_thigh |
| 5 | right_sin |
| 6 | right_foot |
| 7 | left_thigh |
| 8 | left_sin |
| 9 | left_foot |
| 10 | right_upper_arm |
| 11 | right_lower_arm |
| 12 | left_upper_arm |
| 13 | left_lower_arm |
The joints are:
| id (for `v2`,`v3`,`v4`) | joint |
| --- | ------------ |
| 0 | root |
| 1 | root |
| 2 | root |
| 3 | root |
| 4 | root |
| 5 | root |
| 6 | abdomen_z |
| 7 | abdomen_y |
| 8 | abdomen_x |
| 9 | right_hip_x |
| 10 | right_hip_z |
| 11 | right_hip_y |
| 12 | right_knee |
| 13 | left_hip_x |
| 14 | left_hiz_z |
| 15 | left_hip_y |
| 16 | left_knee |
| 17 | right_shoulder1 |
| 18 | right_shoulder2 |
| 19 | right_elbow|
| 20 | left_shoulder1 |
| 21 | left_shoulder2 |
| 22 | left_elfbow |
The (x,y,z) coordinates are translational DOFs while the orientations are rotational
DOFs expressed as quaternions. One can read more about free joints on the
[Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
**Note:** Humanoid-v4 environment no longer has the following contact forces issue.
If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0
results in the contact forces always being 0. As such we recommend to use a Mujoco-Py
version < 2.0 when using the Humanoid environment if you would like to report results
with contact forces (if contact forces are not used in your experiments, you can use
version > 2.0).
## Rewards
The reward consists of three parts:
- *healthy_reward*: Every timestep that the humanoid is alive (see section Episode Termination for definition), it gets a reward of fixed value `healthy_reward`
- *forward_reward*: A reward of walking forward which is measured as *`forward_reward_weight` *
(average center of mass before action - average center of mass after action)/dt*.
*dt* is the time between actions and is dependent on the frame_skip parameter
(default is 5), where the frametime is 0.003 - making the default *dt = 5 * 0.003 = 0.015*.
This reward would be positive if the humanoid walks forward (in positive x-direction). The calculation
for the center of mass is defined in the `.py` file for the Humanoid.
- *ctrl_cost*: A negative reward for penalising the humanoid if it has too
large of a control force. If there are *nu* actuators/controls, then the control has
shape `nu x 1`. It is measured as *`ctrl_cost_weight` * sum(control<sup>2</sup>)*.
- *contact_cost*: A negative reward for penalising the humanoid if the external
contact force is too large. It is calculated by clipping
*`contact_cost_weight` * sum(external contact force<sup>2</sup>)* to the interval specified by `contact_cost_range`.
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state
(0.0, 0.0, 1.4, 1.0, 0.0 ... 0.0) with a uniform noise in the range
of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional and velocity values (values in the table)
for stochasticity. Note that the initial z coordinate is intentionally
selected to be high, thereby indicating a standing up humanoid. The initial
orientation is designed to make it face forward as well.
## Episode End
The humanoid is said to be unhealthy if the z-position of the torso is no longer contained in the
closed interval specified by the argument `healthy_z_range`.
If `terminate_when_unhealthy=True` is passed during construction (which is the default),
the episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
3. Termination: The humanoid is unhealthy
If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('Humanoid-v4')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Humanoid-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"humanoid.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.25` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ term (see section on reward) |
| `contact_cost_weight` | **float** | `5e-7` | Weight for _contact_cost_ term (see section on reward) |
| `healthy_reward` | **float** | `5.0` | Constant reward given if the humanoid is "healthy" after timestep |
| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` |
| `healthy_z_range` | **tuple** | `(1.0, 2.0)` | The humanoid is considered healthy if the z-coordinate of the torso is in this range |
| `reset_noise_scale` | **float** | `1e-2` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(
self,
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"humanoid.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(self.data.ctrl))
return control_cost
@property
def is_healthy(self):
min_z, max_z = self._healthy_z_range
is_healthy = min_z < self.data.qpos[2] < max_z
return is_healthy
@property
def terminated(self):
terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = self.data.qvel.flat.copy()
com_inertia = self.data.cinert.flat.copy()
com_velocity = self.data.cvel.flat.copy()
actuator_forces = self.data.qfrc_actuator.flat.copy()
external_contact_forces = self.data.cfrc_ext.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
return np.concatenate(
(
position,
velocity,
com_inertia,
com_velocity,
actuator_forces,
external_contact_forces,
)
)
def step(self, action):
xy_position_before = mass_center(self.model, self.data)
self.do_simulation(action, self.frame_skip)
xy_position_after = mass_center(self.model, self.data)
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
observation = self._get_obs()
reward = rewards - ctrl_cost
terminated = self.terminated
info = {
"reward_linvel": forward_reward,
"reward_quadctrl": -ctrl_cost,
"reward_alive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,87 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self,
"humanoidstandup.xml",
5,
observation_space=observation_space,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.sim.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
False,
False,
dict(
reward_linup=uph_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 0.8925
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,319 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 0.8925)),
"elevation": -20.0,
}
class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the environment introduced by Tassa, Erez and Todorov
in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a
pair of legs and arms. The legs each consist of two links, and so the arms (representing the
knees and elbows respectively). The environment starts with the humanoid laying on the ground,
and then the goal of the environment is to make the humanoid standup and then keep it standing
by applying torques on the various hinges.
## Action Space
The agent take a 17-element vector for actions.
The action space is a continuous `(action, ...)` all in `[-1, 1]`, where `action`
represents the numerical torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ---------------------------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
| 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | abdomen_y | hinge | torque (N m) |
| 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | abdomen_z | hinge | torque (N m) |
| 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | abdomen_x | hinge | torque (N m) |
| 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) |
| 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) |
| 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) |
| 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) |
| 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) |
| 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) |
| 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) |
| 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) |
| 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) |
| 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) |
| 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) |
| 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) |
| 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) |
| 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the Humanoid,
followed by the velocities of those individual parts (their derivatives) with all the
positions ordered before all the velocities.
By default, observations do not include the x- and y-coordinates of the torso. These may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (378,), float64)` where the first two observations
represent the x- and y-coordinates of the torso.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
However, by default, the observation is a `Box(-Inf, Inf, (376,), float64)`. The elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
| 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) |
| 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) |
| 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) |
| 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) |
| 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) |
| 10 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) |
| 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) |
| 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) |
| 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) |
| 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) |
| 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) |
| 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) |
| 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) |
| 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) |
| 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) |
| 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) |
| 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) |
| 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) |
| 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) |
| 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) |
| 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) |
| 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) |
| 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) |
| 34 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) |
| 35 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) |
| 36 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) |
| 37 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) |
| 38 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) |
| 39 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) |
| 40 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) |
| 41 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) |
| 42 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) |
| 43 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) |
| 44 | angular velocity of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) |
| excluded | x-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| excluded | y-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
Additionally, after all the positional and velocity based values in the table,
the observation contains (in order):
- *cinert:* Mass and inertia of a single rigid body relative to the center of mass
(this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
and hence adds to another 140 elements in the state space.
- *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
adds another 84 elements in the state space
- *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
`(23,)` *(nv * 1)* and hence adds another 23 elements to the state space.
- *cfrc_ext:* This is the center of mass based external force on the body. It has shape
14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
where *nbody* stands for the number of bodies in the robot and *nv* stands for the
number of degrees of freedom (*= dim(qvel)*)
The body parts are:
| id (for `v2`,`v3`,`v4`) | body part |
| --- | ------------ |
| 0 | worldBody (note: all values are constant 0) |
| 1 | torso |
| 2 | lwaist |
| 3 | pelvis |
| 4 | right_thigh |
| 5 | right_sin |
| 6 | right_foot |
| 7 | left_thigh |
| 8 | left_sin |
| 9 | left_foot |
| 10 | right_upper_arm |
| 11 | right_lower_arm |
| 12 | left_upper_arm |
| 13 | left_lower_arm |
The joints are:
| id (for `v2`,`v3`,`v4`) | joint |
| --- | ------------ |
| 0 | root |
| 1 | root |
| 2 | root |
| 3 | root |
| 4 | root |
| 5 | root |
| 6 | abdomen_z |
| 7 | abdomen_y |
| 8 | abdomen_x |
| 9 | right_hip_x |
| 10 | right_hip_z |
| 11 | right_hip_y |
| 12 | right_knee |
| 13 | left_hip_x |
| 14 | left_hiz_z |
| 15 | left_hip_y |
| 16 | left_knee |
| 17 | right_shoulder1 |
| 18 | right_shoulder2 |
| 19 | right_elbow|
| 20 | left_shoulder1 |
| 21 | left_shoulder2 |
| 22 | left_elfbow |
The (x,y,z) coordinates are translational DOFs while the orientations are rotational
DOFs expressed as quaternions. One can read more about free joints on the
[Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
**Note:** HumanoidStandup-v4 environment no longer has the following contact forces issue.
If using previous HumanoidStandup versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
when using the Humanoid environment if you would like to report results with contact forces
(if contact forces are not used in your experiments, you can use version > 2.0).
## Rewards
The reward consists of three parts:
- *uph_cost*: A reward for moving upward (in an attempt to stand up). This is not a relative
reward which measures how much upward it has moved from the last timestep, but it is an
absolute reward which measures how much upward the Humanoid has moved overall. It is
measured as *(z coordinate after action - 0)/(atomic timestep)*, where *z coordinate after
action* is index 0 in the state/index 2 in the table, and *atomic timestep* is the time for
one frame of movement even though the simulation has a framerate of 5 (done in order to inflate
rewards a little for faster learning).
- *quad_ctrl_cost*: A negative reward for penalising the humanoid if it has too large of
a control force. If there are *nu* actuators/controls, then the control has shape `nu x 1`.
It is measured as *0.1 **x** sum(control<sup>2</sup>)*.
- *quad_impact_cost*: A negative reward for penalising the humanoid if the external
contact force is too large. It is calculated as *min(0.5 * 0.000001 * sum(external
contact force<sup>2</sup>), 10)*.
The total reward returned is ***reward*** *=* *uph_cost + 1 - quad_ctrl_cost - quad_impact_cost*
## Starting State
All observations start in state
(0.0, 0.0, 0.105, 1.0, 0.0 ... 0.0) with a uniform noise in the range of
[-0.01, 0.01] added to the positional and velocity values (values in the table)
for stochasticity. Note that the initial z coordinate is intentionally selected
to be low, thereby indicating a laying down humanoid. The initial orientation is
designed to make it face forward as well.
## Episode End
The episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
2. Termination: Any of the state space values is no longer finite
## Arguments
No additional arguments are currently supported.
```python
import gymnasium as gym
env = gym.make('HumanoidStandup-v4')
```
There is no v3 for HumanoidStandup, unlike the robot environments where a v3 and
beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
```python
import gymnasium as gym
env = gym.make('HumanoidStandup-v2')
```
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"humanoidstandup.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.data.qpos[2]
data = self.data
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
False,
False,
dict(
reward_linup=uph_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()

View File

@ -0,0 +1,69 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self,
"inverted_double_pendulum.xml",
5,
observation_space=observation_space,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x**2 + (y - 2) ** 2
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
return ob, r, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10),
]
).ravel()
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent * 0.5
v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]

View File

@ -0,0 +1,179 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 0,
"distance": 4.1225,
"lookat": np.array((0.0, 0.0, 0.12250000000000005)),
}
class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment originates from control theory and builds on the cartpole
environment based on the work done by Barto, Sutton, and Anderson in
["Neuronlike adaptive elements that can solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
powered by the Mujoco physics simulator - allowing for more complex experiments
(such as varying the effects of gravity or constraints). This environment involves a cart that can
moved linearly, with a pole fixed on it and a second pole fixed on the other end of the first one
(leaving the second pole as the only one with one free end). The cart can be pushed left or right,
and the goal is to balance the second pole on top of the first pole, which is in turn on top of the
cart, by applying continuous forces on the cart.
## Action Space
The agent take a 1-element vector for actions.
The action space is a continuous `(action)` in `[-1, 1]`, where `action` represents the
numerical force applied to the cart (with magnitude representing the amount of force and
sign representing the direction)
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
| 0 | Force applied on the cart | -1 | 1 | slider | slide | Force (N) |
## Observation Space
The state space consists of positional values of different body parts of the pendulum system,
followed by the velocities of those individual parts (their derivatives) with all the
positions ordered before all the velocities.
The observation is a `ndarray` with shape `(11,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ----------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) |
| 1 | sine of the angle between the cart and the first pole | -Inf | Inf | sin(hinge) | hinge | unitless |
| 2 | sine of the angle between the two poles | -Inf | Inf | sin(hinge2) | hinge | unitless |
| 3 | cosine of the angle between the cart and the first pole | -Inf | Inf | cos(hinge) | hinge | unitless |
| 4 | cosine of the angle between the two poles | -Inf | Inf | cos(hinge2) | hinge | unitless |
| 5 | velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) |
| 6 | angular velocity of the angle between the cart and the first pole | -Inf | Inf | hinge | hinge | angular velocity (rad/s) |
| 7 | angular velocity of the angle between the two poles | -Inf | Inf | hinge2 | hinge | angular velocity (rad/s) |
| 8 | constraint force - 1 | -Inf | Inf | | | Force (N) |
| 9 | constraint force - 2 | -Inf | Inf | | | Force (N) |
| 10 | constraint force - 3 | -Inf | Inf | | | Force (N) |
There is physical contact between the robots and their environment - and Mujoco
attempts at getting realistic physics simulations for the possible physical contact
dynamics by aiming for physical accuracy and computational efficiency.
There is one constraint force for contacts for each degree of freedom (3).
The approach and handling of constraints by Mujoco is unique to the simulator
and is based on their research. Once can find more information in their
[*documentation*](https://mujoco.readthedocs.io/en/latest/computation.html)
or in their paper
["Analytically-invertible dynamics with contacts and constraints: Theory and implementation in MuJoCo"](https://homes.cs.washington.edu/~todorov/papers/TodorovICRA14.pdf).
## Rewards
The reward consists of two parts:
- *alive_bonus*: The goal is to make the second inverted pendulum stand upright
(within a certain angle limit) as long as possible - as such a reward of +10 is awarded
for each timestep that the second pole is upright.
- *distance_penalty*: This reward is a measure of how far the *tip* of the second pendulum
(the only free end) moves, and it is calculated as
*0.01 * x<sup>2</sup> + (y - 2)<sup>2</sup>*, where *x* is the x-coordinate of the tip
and *y* is the y-coordinate of the tip of the second pole.
- *velocity_penalty*: A negative reward for penalising the agent if it moves too
fast *0.001 * v<sub>1</sub><sup>2</sup> + 0.005 * v<sub>2</sub> <sup>2</sup>*
The total reward returned is ***reward*** *=* *alive_bonus - distance_penalty - velocity_penalty*
## Starting State
All observations start in state
(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
of [-0.1, 0.1] added to the positional values (cart position and pole angles) and standard
normal force with a standard deviation of 0.1 added to the velocity values for stochasticity.
## Episode End
The episode ends when any of the following happens:
1.Truncation: The episode duration reaches 1000 timesteps.
2.Termination: Any of the state space values is no longer finite.
3.Termination: The y_coordinate of the tip of the second pole *is less than or equal* to 1. The maximum standing height of the system is 1.196 m when all the parts are perpendicularly vertical on top of each other).
## Arguments
No additional arguments are currently supported.
```python
import gymnasium as gym
env = gym.make('InvertedDoublePendulum-v4')
```
There is no v3 for InvertedPendulum, unlike the robot environments where a v3 and
beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('InvertedDoublePendulum-v2')
```
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MujocoEnv.__init__(
self,
"inverted_double_pendulum.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.data.site_xpos[0]
dist_penalty = 0.01 * x**2 + (y - 2) ** 2
v1, v2 = self.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
return ob, r, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[
self.data.qpos[:1], # cart x pos
np.sin(self.data.qpos[1:]), # link angles
np.cos(self.data.qpos[1:]),
np.clip(self.data.qvel, -10, 10),
np.clip(self.data.qfrc_constraint, -10, 10),
]
).ravel()
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
)
return self._get_obs()

View File

@ -0,0 +1,56 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
MuJocoPyEnv.__init__(
self,
"inverted_pendulum.xml",
2,
observation_space=observation_space,
**kwargs,
)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.01, high=0.01
)
qvel = self.init_qvel + self.np_random.uniform(
size=self.model.nv, low=-0.01, high=0.01
)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 0
self.viewer.cam.distance = self.model.stat.extent

View File

@ -0,0 +1,137 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 0,
"distance": 2.04,
}
class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is the cartpole environment based on the work done by
Barto, Sutton, and Anderson in ["Neuronlike adaptive elements that can
solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
just like in the classic environments but now powered by the Mujoco physics simulator -
allowing for more complex experiments (such as varying the effects of gravity).
This environment involves a cart that can moved linearly, with a pole fixed on it
at one end and having another end free. The cart can be pushed left or right, and the
goal is to balance the pole on the top of the cart by applying forces on the cart.
## Action Space
The agent take a 1-element vector for actions.
The action space is a continuous `(action)` in `[-3, 3]`, where `action` represents
the numerical force applied to the cart (with magnitude representing the amount of
force and sign representing the direction)
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
| 0 | Force applied on the cart | -3 | 3 | slider | slide | Force (N) |
## Observation Space
The state space consists of positional values of different body parts of
the pendulum system, followed by the velocities of those individual parts (their derivatives)
with all the positions ordered before all the velocities.
The observation is a `ndarray` with shape `(4,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------- |
| 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) |
| 1 | vertical angle of the pole on the cart | -Inf | Inf | hinge | hinge | angle (rad) |
| 2 | linear velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) |
| 3 | angular velocity of the pole on the cart | -Inf | Inf | hinge | hinge | anglular velocity (rad/s) |
## Rewards
The goal is to make the inverted pendulum stand upright (within a certain angle limit)
as long as possible - as such a reward of +1 is awarded for each timestep that
the pole is upright.
## Starting State
All observations start in state
(0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
of [-0.01, 0.01] added to the values for stochasticity.
## Episode End
The episode ends when any of the following happens:
1. Truncation: The episode duration reaches 1000 timesteps.
2. Termination: Any of the state space values is no longer finite.
3. Termination: The absolute value of the vertical angle between the pole and the cart is greater than 0.2 radian.
## Arguments
No additional arguments are currently supported.
```python
import gymnasium as gym
env = gym.make('InvertedPendulum-v4')
```
There is no v3 for InvertedPendulum, unlike the robot environments where a
v3 and beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('InvertedPendulum-v2')
```
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
MujocoEnv.__init__(
self,
"inverted_pendulum.xml",
2,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.01, high=0.01
)
qvel = self.init_qvel + self.np_random.uniform(
size=self.model.nv, low=-0.01, high=0.01
)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.data.qpos, self.data.qvel]).ravel()

View File

@ -0,0 +1,418 @@
from os import path
from typing import Any, Dict, Optional, Tuple, Union
import numpy as np
from numpy.typing import NDArray
import gymnasium as gym
from gymnasium import error, logger, spaces
from gymnasium.spaces import Space
try:
import mujoco_py
except ImportError as e:
MUJOCO_PY_IMPORT_ERROR = e
else:
MUJOCO_PY_IMPORT_ERROR = None
try:
import mujoco
except ImportError as e:
MUJOCO_IMPORT_ERROR = e
else:
MUJOCO_IMPORT_ERROR = None
DEFAULT_SIZE = 480
class BaseMujocoEnv(gym.Env[np.float64, np.float32]):
"""Superclass for all MuJoCo environments."""
def __init__(
self,
model_path,
frame_skip,
observation_space: Space,
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
"""Base abstract class for mujoco based environments.
Args:
model_path: Path to the MuJoCo Model.
frame_skip: Number of MuJoCo simulation steps per gym `step()`.
observation_space: The observation space of the environment.
render_mode: The `render_mode` used.
width: The width of the render window.
height: The height of the render window.
camera_id: The camera ID used.
camera_name: The name of the camera used (can not be used in conjunction with `camera_id`).
Raises:
OSError: when the `model_path` does not exist.
error.DependencyNotInstalled: When `mujoco` is not installed.
"""
if model_path.startswith(".") or model_path.startswith("/"):
self.fullpath = model_path
elif model_path.startswith("~"):
self.fullpath = path.expanduser(model_path)
else:
self.fullpath = path.join(path.dirname(__file__), "assets", model_path)
if not path.exists(self.fullpath):
raise OSError(f"File {self.fullpath} does not exist")
self.width = width
self.height = height
# may use width and height
self.model, self.data = self._initialize_simulation()
self.init_qpos = self.data.qpos.ravel().copy()
self.init_qvel = self.data.qvel.ravel().copy()
self.frame_skip = frame_skip
assert self.metadata["render_modes"] == [
"human",
"rgb_array",
"depth_array",
], self.metadata["render_modes"]
if "render_fps" in self.metadata:
assert (
int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
self.observation_space = observation_space
self._set_action_space()
self.render_mode = render_mode
self.camera_name = camera_name
self.camera_id = camera_id
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
# methods to override:
# ----------------------------
def reset_model(self) -> NDArray[np.float64]:
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def _initialize_simulation(self) -> Tuple[Any, Any]:
"""
Initialize MuJoCo simulation data structures mjModel and mjData.
"""
raise NotImplementedError
def _reset_simulation(self) -> None:
"""
Reset MuJoCo simulation data structures, mjModel and mjData.
"""
raise NotImplementedError
def _step_mujoco_simulation(self, ctrl, n_frames) -> None:
"""
Step over the MuJoCo simulation.
"""
raise NotImplementedError
def render(self) -> Union[NDArray[np.float64], None]:
"""
Render a frame from the MuJoCo simulation as specified by the render_mode.
"""
raise NotImplementedError
# -----------------------------
def _get_reset_info(self) -> Dict[str, float]:
"""Function that generates the `info` that is returned during a `reset()`."""
return {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._reset_simulation()
ob = self.reset_model()
info = self._get_reset_info()
if self.render_mode == "human":
self.render()
return ob, info
def set_state(self, qpos, qvel) -> None:
"""
Set the joints position qpos and velocity qvel of the model. Override this method depending on the MuJoCo bindings used.
"""
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
@property
def dt(self) -> float:
return self.model.opt.timestep * self.frame_skip
def do_simulation(self, ctrl, n_frames) -> None:
"""
Step the simulation n number of frames and applying a control action.
"""
# Check control input is contained in the action space
if np.array(ctrl).shape != (self.model.nu,):
raise ValueError(
f"Action dimension mismatch. Expected {(self.model.nu,)}, found {np.array(ctrl).shape}"
)
self._step_mujoco_simulation(ctrl, n_frames)
def close(self):
"""Close all processes like rendering contexts"""
raise NotImplementedError
def get_body_com(self, body_name) -> NDArray[np.float64]:
"""Return the cartesian position of a body frame"""
raise NotImplementedError
def state_vector(self) -> NDArray[np.float64]:
"""Return the position and velocity joint states of the model"""
return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])
class MuJocoPyEnv(BaseMujocoEnv):
def __init__(
self,
model_path: str,
frame_skip: int,
observation_space: Space,
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
if MUJOCO_PY_IMPORT_ERROR is not None:
raise error.DependencyNotInstalled(
f"{MUJOCO_PY_IMPORT_ERROR}. "
"(HINT: you need to install mujoco-py, and also perform the setup instructions "
"here: https://github.com/openai/mujoco-py.)"
)
logger.deprecation(
"This version of the mujoco environments depends "
"on the mujoco-py bindings, which are no longer maintained "
"and may stop working. Please upgrade to the v4 versions of "
"the environments (which depend on the mujoco python bindings instead), unless "
"you are trying to precisely replicate previous works)."
)
self.viewer = None
self._viewers = {}
super().__init__(
model_path,
frame_skip,
observation_space,
render_mode,
width,
height,
camera_id,
camera_name,
)
def _initialize_simulation(self):
model = mujoco_py.load_model_from_path(self.fullpath)
self.sim = mujoco_py.MjSim(model)
data = self.sim.data
return model, data
def _reset_simulation(self):
self.sim.reset()
def set_state(self, qpos, qvel):
super().set_state(qpos, qvel)
state = self.sim.get_state()
state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state)
self.sim.set_state(state)
self.sim.forward()
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
def _step_mujoco_simulation(self, ctrl, n_frames):
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
self.sim.step()
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
width, height = self.width, self.height
camera_name, camera_id = self.camera_name, self.camera_id
if self.render_mode in {"rgb_array", "depth_array"}:
if camera_id is not None and camera_name is not None:
raise ValueError(
"Both `camera_id` and `camera_name` cannot be"
" specified at the same time."
)
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = "track"
if camera_id is None and camera_name in self.model._camera_name2id:
if camera_name in self.model._camera_name2id:
camera_id = self.model.camera_name2id(camera_name)
self._get_viewer(self.render_mode).render(
width, height, camera_id=camera_id
)
if self.render_mode == "rgb_array":
data = self._get_viewer(self.render_mode).read_pixels(
width, height, depth=False
)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif self.render_mode == "depth_array":
self._get_viewer(self.render_mode).render(width, height)
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(self.render_mode).read_pixels(
width, height, depth=True
)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif self.render_mode == "human":
self._get_viewer(self.render_mode).render()
def _get_viewer(
self, mode
) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]:
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == "human":
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode in {"rgb_array", "depth_array"}:
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
else:
raise AttributeError(
f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}"
)
self.viewer_setup()
self._viewers[mode] = self.viewer
return self.viewer
def close(self):
if self.viewer is not None:
self.viewer = None
self._viewers = {}
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
Optionally implement this method, if you need to tinker with camera position and so forth.
"""
raise NotImplementedError
class MujocoEnv(BaseMujocoEnv):
"""Superclass for MuJoCo environments."""
def __init__(
self,
model_path,
frame_skip,
observation_space: Space,
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
default_camera_config: Optional[dict] = None,
):
if MUJOCO_IMPORT_ERROR is not None:
raise error.DependencyNotInstalled(
f"{MUJOCO_IMPORT_ERROR}. "
"(HINT: you need to install mujoco, run `pip install gymnasium[mujoco]`.)"
)
super().__init__(
model_path,
frame_skip,
observation_space,
render_mode,
width,
height,
camera_id,
camera_name,
)
from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
self.mujoco_renderer = MujocoRenderer(
self.model, self.data, default_camera_config
)
def _initialize_simulation(
self,
):
model = mujoco.MjModel.from_xml_path(self.fullpath)
# MjrContext will copy model.vis.global_.off* to con.off*
model.vis.global_.offwidth = self.width
model.vis.global_.offheight = self.height
data = mujoco.MjData(model)
return model, data
def _reset_simulation(self):
mujoco.mj_resetData(self.model, self.data)
def set_state(self, qpos, qvel):
super().set_state(qpos, qvel)
self.data.qpos[:] = np.copy(qpos)
self.data.qvel[:] = np.copy(qvel)
if self.model.na == 0:
self.data.act[:] = None
mujoco.mj_forward(self.model, self.data)
def _step_mujoco_simulation(self, ctrl, n_frames):
self.data.ctrl[:] = ctrl
mujoco.mj_step(self.model, self.data, nstep=n_frames)
# As of MuJoCo 2.0, force-related quantities like cacc are not computed
# unless there's a force sensor in the model.
# See https://github.com/openai/gym/issues/1541
mujoco.mj_rnePostConstraint(self.model, self.data)
def render(self):
return self.mujoco_renderer.render(
self.render_mode, self.camera_id, self.camera_name
)
def close(self):
if self.mujoco_renderer is not None:
self.mujoco_renderer.close()
def get_body_com(self, body_name):
return self.data.body(body_name).xpos

View File

@ -0,0 +1,714 @@
import collections
import os
import time
from typing import Optional
import glfw
import imageio
import mujoco
import numpy as np
def _import_egl(width, height):
from mujoco.egl import GLContext
return GLContext(width, height)
def _import_glfw(width, height):
from mujoco.glfw import GLContext
return GLContext(width, height)
def _import_osmesa(width, height):
from mujoco.osmesa import GLContext
return GLContext(width, height)
_ALL_RENDERERS = collections.OrderedDict(
[
("glfw", _import_glfw),
("egl", _import_egl),
("osmesa", _import_osmesa),
]
)
class BaseRender:
def __init__(
self, model: "mujoco.MjModel", data: "mujoco.MjData", width: int, height: int
):
"""Render context superclass for offscreen and window rendering."""
self.model = model
self.data = data
self._markers = []
self._overlays = {}
self.viewport = mujoco.MjrRect(0, 0, width, height)
# This goes to specific visualizer
self.scn = mujoco.MjvScene(self.model, 1000)
self.cam = mujoco.MjvCamera()
self.vopt = mujoco.MjvOption()
self.pert = mujoco.MjvPerturb()
self.make_context_current()
# Keep in Mujoco Context
self.con = mujoco.MjrContext(self.model, mujoco.mjtFontScale.mjFONTSCALE_150)
self._set_mujoco_buffer()
def _set_mujoco_buffer(self):
raise NotImplementedError
def make_context_current(self):
raise NotImplementedError
def add_overlay(self, gridpos: int, text1: str, text2: str):
"""Overlays text on the scene."""
if gridpos not in self._overlays:
self._overlays[gridpos] = ["", ""]
self._overlays[gridpos][0] += text1 + "\n"
self._overlays[gridpos][1] += text2 + "\n"
def add_marker(self, **marker_params):
self._markers.append(marker_params)
def _add_marker_to_scene(self, marker: dict):
if self.scn.ngeom >= self.scn.maxgeom:
raise RuntimeError("Ran out of geoms. maxgeom: %d" % self.scn.maxgeom)
g = self.scn.geoms[self.scn.ngeom]
# default values.
g.dataid = -1
g.objtype = mujoco.mjtObj.mjOBJ_UNKNOWN
g.objid = -1
g.category = mujoco.mjtCatBit.mjCAT_DECOR
g.texid = -1
g.texuniform = 0
g.texrepeat[0] = 1
g.texrepeat[1] = 1
g.emission = 0
g.specular = 0.5
g.shininess = 0.5
g.reflectance = 0
g.type = mujoco.mjtGeom.mjGEOM_BOX
g.size[:] = np.ones(3) * 0.1
g.mat[:] = np.eye(3)
g.rgba[:] = np.ones(4)
for key, value in marker.items():
if isinstance(value, (int, float, mujoco._enums.mjtGeom)):
setattr(g, key, value)
elif isinstance(value, (tuple, list, np.ndarray)):
attr = getattr(g, key)
attr[:] = np.asarray(value).reshape(attr.shape)
elif isinstance(value, str):
assert key == "label", "Only label is a string in mjtGeom."
if value is None:
g.label[0] = 0
else:
g.label = value
elif hasattr(g, key):
raise ValueError(
"mjtGeom has attr {} but type {} is invalid".format(
key, type(value)
)
)
else:
raise ValueError("mjtGeom doesn't have field %s" % key)
self.scn.ngeom += 1
def close(self):
"""Override close in your rendering subclass to perform any necessary cleanup
after env.close() is called.
"""
raise NotImplementedError
class OffScreenViewer(BaseRender):
"""Offscreen rendering class with opengl context."""
def __init__(self, model: "mujoco.MjMujoco", data: "mujoco.MjData"):
width = model.vis.global_.offwidth
height = model.vis.global_.offheight
# We must make GLContext before MjrContext
self._get_opengl_backend(width, height)
super().__init__(model, data, width, height)
self._init_camera()
def _init_camera(self):
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
self.cam.fixedcamid = -1
for i in range(3):
self.cam.lookat[i] = np.median(self.data.geom_xpos[:, i])
self.cam.distance = self.model.stat.extent
def _get_opengl_backend(self, width: int, height: int):
self.backend = os.environ.get("MUJOCO_GL")
if self.backend is not None:
try:
self.opengl_context = _ALL_RENDERERS[self.backend](width, height)
except KeyError as e:
raise RuntimeError(
"Environment variable {} must be one of {!r}: got {!r}.".format(
"MUJOCO_GL", _ALL_RENDERERS.keys(), self.backend
)
) from e
else:
for name, _ in _ALL_RENDERERS.items():
try:
self.opengl_context = _ALL_RENDERERS[name](width, height)
self.backend = name
break
except: # noqa:E722
pass
if self.backend is None:
raise RuntimeError(
"No OpenGL backend could be imported. Attempting to create a "
"rendering context will result in a RuntimeError."
)
def _set_mujoco_buffer(self):
mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_OFFSCREEN, self.con)
def make_context_current(self):
self.opengl_context.make_current()
def free(self):
self.opengl_context.free()
def __del__(self):
self.free()
def render(
self,
render_mode: str,
camera_id: Optional[int] = None,
segmentation: bool = False,
):
if camera_id is not None:
if camera_id == -1:
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
else:
self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED
self.cam.fixedcamid = camera_id
mujoco.mjv_updateScene(
self.model,
self.data,
self.vopt,
self.pert,
self.cam,
mujoco.mjtCatBit.mjCAT_ALL,
self.scn,
)
if segmentation:
self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 1
self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 1
for marker_params in self._markers:
self._add_marker_to_scene(marker_params)
mujoco.mjr_render(self.viewport, self.scn, self.con)
for gridpos, (text1, text2) in self._overlays.items():
mujoco.mjr_overlay(
mujoco.mjtFontScale.mjFONTSCALE_150,
gridpos,
self.viewport,
text1.encode(),
text2.encode(),
self.con,
)
if segmentation:
self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 0
self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 0
rgb_arr = np.zeros(
3 * self.viewport.width * self.viewport.height, dtype=np.uint8
)
depth_arr = np.zeros(
self.viewport.width * self.viewport.height, dtype=np.float32
)
mujoco.mjr_readPixels(rgb_arr, depth_arr, self.viewport, self.con)
if render_mode == "depth_array":
depth_img = depth_arr.reshape(self.viewport.height, self.viewport.width)
# original image is upside-down, so flip it
return depth_img[::-1, :]
else:
rgb_img = rgb_arr.reshape(self.viewport.height, self.viewport.width, 3)
if segmentation:
seg_img = (
rgb_img[:, :, 0]
+ rgb_img[:, :, 1] * (2**8)
+ rgb_img[:, :, 2] * (2**16)
)
seg_img[seg_img >= (self.scn.ngeom + 1)] = 0
seg_ids = np.full(
(self.scn.ngeom + 1, 2), fill_value=-1, dtype=np.int32
)
for i in range(self.scn.ngeom):
geom = self.scn.geoms[i]
if geom.segid != -1:
seg_ids[geom.segid + 1, 0] = geom.objtype
seg_ids[geom.segid + 1, 1] = geom.objid
rgb_img = seg_ids[seg_img]
# original image is upside-down, so flip i
return rgb_img[::-1, :, :]
def close(self):
self.free()
glfw.terminate()
class WindowViewer(BaseRender):
"""Class for window rendering in all MuJoCo environments."""
def __init__(self, model: "mujoco.MjModel", data: "mujoco.MjData"):
glfw.init()
self._button_left_pressed = False
self._button_right_pressed = False
self._last_mouse_x = 0
self._last_mouse_y = 0
self._paused = False
self._transparent = False
self._contacts = False
self._render_every_frame = True
self._image_idx = 0
self._image_path = "/tmp/frame_%07d.png"
self._time_per_render = 1 / 60.0
self._run_speed = 1.0
self._loop_count = 0
self._advance_by_one_step = False
self._hide_menu = False
width, height = glfw.get_video_mode(glfw.get_primary_monitor()).size
glfw.window_hint(glfw.VISIBLE, 1)
self.window = glfw.create_window(width // 2, height // 2, "mujoco", None, None)
self.width, self.height = glfw.get_framebuffer_size(self.window)
window_width, _ = glfw.get_window_size(self.window)
self._scale = self.width * 1.0 / window_width
# set callbacks
glfw.set_cursor_pos_callback(self.window, self._cursor_pos_callback)
glfw.set_mouse_button_callback(self.window, self._mouse_button_callback)
glfw.set_scroll_callback(self.window, self._scroll_callback)
glfw.set_key_callback(self.window, self._key_callback)
super().__init__(model, data, width, height)
glfw.swap_interval(1)
def _set_mujoco_buffer(self):
mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_WINDOW, self.con)
def make_context_current(self):
glfw.make_context_current(self.window)
def free(self):
if self.window:
if glfw.get_current_context() == self.window:
glfw.make_context_current(None)
glfw.destroy_window(self.window)
self.window = None
def __del__(self):
"""Eliminate all of the OpenGL glfw contexts and windows"""
self.free()
def render(self):
"""
Renders the environment geometries in the OpenGL glfw window:
1. Create the overlay for the left side panel menu.
2. Update the geometries used for rendering based on the current state of the model - `mujoco.mjv_updateScene()`.
3. Add markers to scene, these are additional geometries to include in the model, i.e arrows, https://mujoco.readthedocs.io/en/latest/APIreference.html?highlight=arrow#mjtgeom.
These markers are added with the `add_marker()` method before rendering.
4. Render the 3D scene to the window context - `mujoco.mjr_render()`.
5. Render overlays in the window context - `mujoco.mjr_overlay()`.
6. Swap front and back buffer, https://www.glfw.org/docs/3.3/quick.html.
7. Poll events like mouse clicks or keyboard input.
"""
# mjv_updateScene, mjr_render, mjr_overlay
def update():
# fill overlay items
self._create_overlay()
render_start = time.time()
if self.window is None:
return
elif glfw.window_should_close(self.window):
glfw.destroy_window(self.window)
glfw.terminate()
self.viewport.width, self.viewport.height = glfw.get_framebuffer_size(
self.window
)
# update scene
mujoco.mjv_updateScene(
self.model,
self.data,
self.vopt,
mujoco.MjvPerturb(),
self.cam,
mujoco.mjtCatBit.mjCAT_ALL.value,
self.scn,
)
# marker items
for marker in self._markers:
self._add_marker_to_scene(marker)
# render
mujoco.mjr_render(self.viewport, self.scn, self.con)
# overlay items
if not self._hide_menu:
for gridpos, [t1, t2] in self._overlays.items():
mujoco.mjr_overlay(
mujoco.mjtFontScale.mjFONTSCALE_150,
gridpos,
self.viewport,
t1,
t2,
self.con,
)
glfw.swap_buffers(self.window)
glfw.poll_events()
self._time_per_render = 0.9 * self._time_per_render + 0.1 * (
time.time() - render_start
)
if self._paused:
while self._paused:
update()
if self._advance_by_one_step:
self._advance_by_one_step = False
break
else:
self._loop_count += self.model.opt.timestep / (
self._time_per_render * self._run_speed
)
if self._render_every_frame:
self._loop_count = 1
while self._loop_count > 0:
update()
self._loop_count -= 1
# clear overlay
self._overlays.clear()
# clear markers
self._markers.clear()
def close(self):
self.free()
glfw.terminate()
def _key_callback(self, window, key: int, scancode, action: int, mods):
if action != glfw.RELEASE:
return
# Switch cameras
elif key == glfw.KEY_TAB:
self.cam.fixedcamid += 1
self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED
if self.cam.fixedcamid >= self.model.ncam:
self.cam.fixedcamid = -1
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
# Pause simulation
elif key == glfw.KEY_SPACE and self._paused is not None:
self._paused = not self._paused
# Advances simulation by one step.
elif key == glfw.KEY_RIGHT and self._paused is not None:
self._advance_by_one_step = True
self._paused = True
# Slows down simulation
elif key == glfw.KEY_S:
self._run_speed /= 2.0
# Speeds up simulation
elif key == glfw.KEY_F:
self._run_speed *= 2.0
# Turn off / turn on rendering every frame.
elif key == glfw.KEY_D:
self._render_every_frame = not self._render_every_frame
# Capture screenshot
elif key == glfw.KEY_T:
img = np.zeros(
(
glfw.get_framebuffer_size(self.window)[1],
glfw.get_framebuffer_size(self.window)[0],
3,
),
dtype=np.uint8,
)
mujoco.mjr_readPixels(img, None, self.viewport, self.con)
imageio.imwrite(self._image_path % self._image_idx, np.flipud(img))
self._image_idx += 1
# Display contact forces
elif key == glfw.KEY_C:
self._contacts = not self._contacts
self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = self._contacts
self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = self._contacts
# Display coordinate frames
elif key == glfw.KEY_E:
self.vopt.frame = 1 - self.vopt.frame
# Hide overlay menu
elif key == glfw.KEY_H:
self._hide_menu = not self._hide_menu
# Make transparent
elif key == glfw.KEY_R:
self._transparent = not self._transparent
if self._transparent:
self.model.geom_rgba[:, 3] /= 5.0
else:
self.model.geom_rgba[:, 3] *= 5.0
# Geom group visibility
elif key in (glfw.KEY_0, glfw.KEY_1, glfw.KEY_2, glfw.KEY_3, glfw.KEY_4):
self.vopt.geomgroup[key - glfw.KEY_0] ^= 1
# Quit
if key == glfw.KEY_ESCAPE:
print("Pressed ESC")
print("Quitting.")
glfw.destroy_window(self.window)
glfw.terminate()
def _cursor_pos_callback(
self, window: "glfw.LP__GLFWwindow", xpos: float, ypos: float
):
if not (self._button_left_pressed or self._button_right_pressed):
return
mod_shift = (
glfw.get_key(window, glfw.KEY_LEFT_SHIFT) == glfw.PRESS
or glfw.get_key(window, glfw.KEY_RIGHT_SHIFT) == glfw.PRESS
)
if self._button_right_pressed:
action = (
mujoco.mjtMouse.mjMOUSE_MOVE_H
if mod_shift
else mujoco.mjtMouse.mjMOUSE_MOVE_V
)
elif self._button_left_pressed:
action = (
mujoco.mjtMouse.mjMOUSE_ROTATE_H
if mod_shift
else mujoco.mjtMouse.mjMOUSE_ROTATE_V
)
else:
action = mujoco.mjtMouse.mjMOUSE_ZOOM
dx = int(self._scale * xpos) - self._last_mouse_x
dy = int(self._scale * ypos) - self._last_mouse_y
width, height = glfw.get_framebuffer_size(window)
mujoco.mjv_moveCamera(
self.model, action, dx / height, dy / height, self.scn, self.cam
)
self._last_mouse_x = int(self._scale * xpos)
self._last_mouse_y = int(self._scale * ypos)
def _mouse_button_callback(self, window: "glfw.LP__GLFWwindow", button, act, mods):
self._button_left_pressed = (
glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_LEFT) == glfw.PRESS
)
self._button_right_pressed = (
glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_RIGHT) == glfw.PRESS
)
x, y = glfw.get_cursor_pos(window)
self._last_mouse_x = int(self._scale * x)
self._last_mouse_y = int(self._scale * y)
def _scroll_callback(self, window, x_offset, y_offset: float):
mujoco.mjv_moveCamera(
self.model,
mujoco.mjtMouse.mjMOUSE_ZOOM,
0,
-0.05 * y_offset,
self.scn,
self.cam,
)
def _create_overlay(self):
topleft = mujoco.mjtGridPos.mjGRID_TOPLEFT
bottomleft = mujoco.mjtGridPos.mjGRID_BOTTOMLEFT
if self._render_every_frame:
self.add_overlay(topleft, "", "")
else:
self.add_overlay(
topleft,
"Run speed = %.3f x real time" % self._run_speed,
"[S]lower, [F]aster",
)
self.add_overlay(
topleft, "Ren[d]er every frame", "On" if self._render_every_frame else "Off"
)
self.add_overlay(
topleft,
"Switch camera (#cams = %d)" % (self.model.ncam + 1),
"[Tab] (camera ID = %d)" % self.cam.fixedcamid,
)
self.add_overlay(topleft, "[C]ontact forces", "On" if self._contacts else "Off")
self.add_overlay(topleft, "T[r]ansparent", "On" if self._transparent else "Off")
if self._paused is not None:
if not self._paused:
self.add_overlay(topleft, "Stop", "[Space]")
else:
self.add_overlay(topleft, "Start", "[Space]")
self.add_overlay(
topleft, "Advance simulation by one step", "[right arrow]"
)
self.add_overlay(
topleft, "Referenc[e] frames", "On" if self.vopt.frame == 1 else "Off"
)
self.add_overlay(topleft, "[H]ide Menu", "")
if self._image_idx > 0:
fname = self._image_path % (self._image_idx - 1)
self.add_overlay(topleft, "Cap[t]ure frame", "Saved as %s" % fname)
else:
self.add_overlay(topleft, "Cap[t]ure frame", "")
self.add_overlay(topleft, "Toggle geomgroup visibility", "0-4")
self.add_overlay(bottomleft, "FPS", "%d%s" % (1 / self._time_per_render, ""))
self.add_overlay(
bottomleft, "Solver iterations", str(self.data.solver_iter + 1)
)
self.add_overlay(
bottomleft, "Step", str(round(self.data.time / self.model.opt.timestep))
)
self.add_overlay(bottomleft, "timestep", "%.5f" % self.model.opt.timestep)
class MujocoRenderer:
"""This is the MuJoCo renderer manager class for every MuJoCo environment.
The class has two main public methods available:
- :meth:`render` - Renders the environment in three possible modes: "human", "rgb_array", or "depth_array"
- :meth:`close` - Closes all contexts initialized with the renderer
"""
def __init__(
self,
model: "mujoco.MjModel",
data: "mujoco.MjData",
default_cam_config: Optional[dict] = None,
):
"""A wrapper for clipping continuous actions within the valid bound.
Args:
model: MjModel data structure of the MuJoCo simulation
data: MjData data structure of the MuJoCo simulation
default_cam_config: dictionary with attribute values of the viewer's default camera, https://mujoco.readthedocs.io/en/latest/XMLreference.html?highlight=camera#visual-global
"""
self.model = model
self.data = data
self._viewers = {}
self.viewer = None
self.default_cam_config = default_cam_config
def render(
self,
render_mode: str,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
"""Renders a frame of the simulation in a specific format and camera view.
Args:
render_mode: The format to render the frame, it can be: "human", "rgb_array", or "depth_array"
camera_id: The integer camera id from which to render the frame in the MuJoCo simulation
camera_name: The string name of the camera from which to render the frame in the MuJoCo simulation. This argument should not be passed if using cameara_id instead and vice versa
Returns:
If render_mode is "rgb_array" or "depth_arra" it returns a numpy array in the specified format. "human" render mode does not return anything.
"""
viewer = self._get_viewer(render_mode=render_mode)
if render_mode in {
"rgb_array",
"depth_array",
}:
if camera_id is not None and camera_name is not None:
raise ValueError(
"Both `camera_id` and `camera_name` cannot be"
" specified at the same time."
)
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = "track"
if camera_id is None:
camera_id = mujoco.mj_name2id(
self.model,
mujoco.mjtObj.mjOBJ_CAMERA,
camera_name,
)
img = viewer.render(render_mode=render_mode, camera_id=camera_id)
return img
elif render_mode == "human":
return viewer.render()
def _get_viewer(self, render_mode: str):
"""Initializes and returns a viewer class depending on the render_mode
- `WindowViewer` class for "human" render mode
- `OffScreenViewer` class for "rgb_array" or "depth_array" render mode
"""
self.viewer = self._viewers.get(render_mode)
if self.viewer is None:
if render_mode == "human":
self.viewer = WindowViewer(self.model, self.data)
elif render_mode in {"rgb_array", "depth_array"}:
self.viewer = OffScreenViewer(self.model, self.data)
else:
raise AttributeError(
f"Unexpected mode: {render_mode}, expected modes: human, rgb_array, or depth_array"
)
# Add default camera parameters
self._set_cam_config()
self._viewers[render_mode] = self.viewer
if len(self._viewers.keys()) > 1:
# Only one context can be current at a time
self.viewer.make_context_current()
return self.viewer
def _set_cam_config(self):
"""Set the default camera parameters"""
assert self.viewer is not None
if self.default_cam_config is not None:
for key, value in self.default_cam_config.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)
def close(self):
"""Close the OpenGL rendering contexts of all viewer modes"""
for _, viewer in self._viewers.items():
viewer.close()

View File

@ -0,0 +1,84 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class PusherEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "pusher.xml", 5, observation_space=observation_space, **kwargs
)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
reward_near = -np.linalg.norm(vec_1)
reward_dist = -np.linalg.norm(vec_2)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (
ob,
reward,
False,
False,
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
)
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = -1
self.viewer.cam.distance = 4.0
def reset_model(self):
qpos = self.init_qpos
self.goal_pos = np.asarray([0, 0])
while True:
self.cylinder_pos = np.concatenate(
[
self.np_random.uniform(low=-0.3, high=0, size=1),
self.np_random.uniform(low=-0.2, high=0.2, size=1),
]
)
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
break
qpos[-4:-2] = self.cylinder_pos
qpos[-2:] = self.goal_pos
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
]
)

View File

@ -0,0 +1,217 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": -1,
"distance": 4.0,
}
class PusherEnv(MujocoEnv, utils.EzPickle):
"""
## Description
"Pusher" is a multi-jointed robot arm which is very similar to that of a human.
The goal is to move a target cylinder (called *object*) to a goal position using the robot's end effector (called *fingertip*).
The robot consists of shoulder, elbow, forearm, and wrist joints.
## Action Space
The action space is a `Box(-2, 2, (7,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|--------------------------------------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
| 0 | Rotation of the panning the shoulder | -2 | 2 | r_shoulder_pan_joint | hinge | torque (N m) |
| 1 | Rotation of the shoulder lifting joint | -2 | 2 | r_shoulder_lift_joint | hinge | torque (N m) |
| 2 | Rotation of the shoulder rolling joint | -2 | 2 | r_upper_arm_roll_joint | hinge | torque (N m) |
| 3 | Rotation of hinge joint that flexed the elbow | -2 | 2 | r_elbow_flex_joint | hinge | torque (N m) |
| 4 | Rotation of hinge that rolls the forearm | -2 | 2 | r_forearm_roll_joint | hinge | torque (N m) |
| 5 | Rotation of flexing the wrist | -2 | 2 | r_wrist_flex_joint | hinge | torque (N m) |
| 6 | Rotation of rolling the wrist | -2 | 2 | r_wrist_roll_joint | hinge | torque (N m) |
## Observation Space
Observations consist of
- Angle of rotational joints on the pusher
- Angular velocities of rotational joints on the pusher
- The coordinates of the fingertip of the pusher
- The coordinates of the object to be moved
- The coordinates of the goal position
The observation is a `Box(-Inf, Inf, (23,), float64)` where the elements correspond to the table below.
An analogy can be drawn to a human arm in order to help understand the state space, with the words flex and roll meaning the
same as human joints.
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | -------------------------------------------------------- | ---- | --- | -------------------------------- | -------- | ------------------------ |
| 0 | Rotation of the panning the shoulder | -Inf | Inf | r_shoulder_pan_joint | hinge | angle (rad) |
| 1 | Rotation of the shoulder lifting joint | -Inf | Inf | r_shoulder_lift_joint | hinge | angle (rad) |
| 2 | Rotation of the shoulder rolling joint | -Inf | Inf | r_upper_arm_roll_joint | hinge | angle (rad) |
| 3 | Rotation of hinge joint that flexed the elbow | -Inf | Inf | r_elbow_flex_joint | hinge | angle (rad) |
| 4 | Rotation of hinge that rolls the forearm | -Inf | Inf | r_forearm_roll_joint | hinge | angle (rad) |
| 5 | Rotation of flexing the wrist | -Inf | Inf | r_wrist_flex_joint | hinge | angle (rad) |
| 6 | Rotation of rolling the wrist | -Inf | Inf | r_wrist_roll_joint | hinge | angle (rad) |
| 7 | Rotational velocity of the panning the shoulder | -Inf | Inf | r_shoulder_pan_joint | hinge | angular velocity (rad/s) |
| 8 | Rotational velocity of the shoulder lifting joint | -Inf | Inf | r_shoulder_lift_joint | hinge | angular velocity (rad/s) |
| 9 | Rotational velocity of the shoulder rolling joint | -Inf | Inf | r_upper_arm_roll_joint | hinge | angular velocity (rad/s) |
| 10 | Rotational velocity of hinge joint that flexed the elbow | -Inf | Inf | r_elbow_flex_joint | hinge | angular velocity (rad/s) |
| 11 | Rotational velocity of hinge that rolls the forearm | -Inf | Inf | r_forearm_roll_joint | hinge | angular velocity (rad/s) |
| 12 | Rotational velocity of flexing the wrist | -Inf | Inf | r_wrist_flex_joint | hinge | angular velocity (rad/s) |
| 13 | Rotational velocity of rolling the wrist | -Inf | Inf | r_wrist_roll_joint | hinge | angular velocity (rad/s) |
| 14 | x-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) |
| 15 | y-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) |
| 16 | z-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) |
| 17 | x-coordinate of the object to be moved | -Inf | Inf | object (obj_slidex) | slide | position (m) |
| 18 | y-coordinate of the object to be moved | -Inf | Inf | object (obj_slidey) | slide | position (m) |
| 19 | z-coordinate of the object to be moved | -Inf | Inf | object | cylinder | position (m) |
| 20 | x-coordinate of the goal position of the object | -Inf | Inf | goal (goal_slidex) | slide | position (m) |
| 21 | y-coordinate of the goal position of the object | -Inf | Inf | goal (goal_slidey) | slide | position (m) |
| 22 | z-coordinate of the goal position of the object | -Inf | Inf | goal | sphere | position (m) |
## Rewards
The reward consists of two parts:
- *reward_near *: This reward is a measure of how far the *fingertip*
of the pusher (the unattached end) is from the object, with a more negative
value assigned for when the pusher's *fingertip* is further away from the
target. It is calculated as the negative vector norm of (position of
the fingertip - position of target), or *-norm("fingertip" - "target")*.
- *reward_dist *: This reward is a measure of how far the object is from
the target goal position, with a more negative value assigned for object is
further away from the target. It is calculated as the negative vector norm of
(position of the object - position of goal), or *-norm("object" - "target")*.
- *reward_control*: A negative reward for penalising the pusher if
it takes actions that are too large. It is measured as the negative squared
Euclidean norm of the action, i.e. as *- sum(action<sup>2</sup>)*.
The total reward returned is ***reward*** *=* *reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near*
Unlike other environments, Pusher does not allow you to specify weights for the individual reward terms.
However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms,
you should create a wrapper that computes the weighted reward from `info`.
## Starting State
All pusher (not including object and goal) states start in
(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0). A uniform noise in the range
[-0.005, 0.005] is added to the velocity attributes only. The velocities of
the object and goal are permanently set to 0. The object's x-position is selected uniformly
between [-0.3, 0] while the y-position is selected uniformly between [-0.2, 0.2], and this
process is repeated until the vector norm between the object's (x,y) position and origin is not greater
than 0.17. The goal always have the same position of (0.45, -0.05, -0.323).
The default framerate is 5 with each frame lasting for 0.01, giving rise to a *dt = 5 * 0.01 = 0.05*
## Episode End
The episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 100 timesteps.
2. Termination: Any of the state space values is no longer finite.
## Arguments
No additional arguments are currently supported (in v2 and lower),
but modifications can be made to the XML file in the assets folder
(or by changing the path to a modified XML file in another folder)..
```python
import gymnasium as gym
env = gym.make('Pusher-v4')
```
There is no v3 for Pusher, unlike the robot environments where a v3 and
beyond take `gymnasmium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Pusher-v2')
```
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64)
MujocoEnv.__init__(
self,
"pusher.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
reward_near = -np.linalg.norm(vec_1)
reward_dist = -np.linalg.norm(vec_2)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (
ob,
reward,
False,
False,
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
)
def reset_model(self):
qpos = self.init_qpos
self.goal_pos = np.asarray([0, 0])
while True:
self.cylinder_pos = np.concatenate(
[
self.np_random.uniform(low=-0.3, high=0, size=1),
self.np_random.uniform(low=-0.2, high=0.2, size=1),
]
)
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
break
qpos[-4:-2] = self.cylinder_pos
qpos[-2:] = self.goal_pos
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate(
[
self.data.qpos.flat[:7],
self.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
]
)

View File

@ -0,0 +1,75 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 50,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "reacher.xml", 2, observation_space=observation_space, **kwargs
)
def step(self, a):
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist = -np.linalg.norm(vec)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (
ob,
reward,
False,
False,
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
)
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 0
def reset_model(self):
qpos = (
self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
+ self.init_qpos
)
while True:
self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
if np.linalg.norm(self.goal) < 0.2:
break
qpos[-2:] = self.goal
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:2]
return np.concatenate(
[
np.cos(theta),
np.sin(theta),
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target"),
]
)

View File

@ -0,0 +1,190 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {"trackbodyid": 0}
class ReacherEnv(MujocoEnv, utils.EzPickle):
"""
## Description
"Reacher" is a two-jointed robot arm. The goal is to move the robot's end effector (called *fingertip*) close to a
target that is spawned at a random position.
## Action Space
The action space is a `Box(-1, 1, (2,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|---------------------------------------------------------------------------------|-------------|-------------|--------------------------|-------|------|
| 0 | Torque applied at the first hinge (connecting the link to the point of fixture) | -1 | 1 | joint0 | hinge | torque (N m) |
| 1 | Torque applied at the second hinge (connecting the two links) | -1 | 1 | joint1 | hinge | torque (N m) |
## Observation Space
Observations consist of
- The cosine of the angles of the two arms
- The sine of the angles of the two arms
- The coordinates of the target
- The angular velocities of the arms
- The vector between the target and the reacher's fingertip (3 dimensional with the last element being 0)
The observation is a `Box(-Inf, Inf, (11,), float64)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ---------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | cosine of the angle of the first arm | -Inf | Inf | cos(joint0) | hinge | unitless |
| 1 | cosine of the angle of the second arm | -Inf | Inf | cos(joint1) | hinge | unitless |
| 2 | sine of the angle of the first arm | -Inf | Inf | sin(joint0) | hinge | unitless |
| 3 | sine of the angle of the second arm | -Inf | Inf | sin(joint1) | hinge | unitless |
| 4 | x-coordinate of the target | -Inf | Inf | target_x | slide | position (m) |
| 5 | y-coordinate of the target | -Inf | Inf | target_y | slide | position (m) |
| 6 | angular velocity of the first arm | -Inf | Inf | joint0 | hinge | angular velocity (rad/s) |
| 7 | angular velocity of the second arm | -Inf | Inf | joint1 | hinge | angular velocity (rad/s) |
| 8 | x-value of position_fingertip - position_target | -Inf | Inf | NA | slide | position (m) |
| 9 | y-value of position_fingertip - position_target | -Inf | Inf | NA | slide | position (m) |
| 10 | z-value of position_fingertip - position_target (constantly 0 since reacher is 2d and z is same for both) | -Inf | Inf | NA | slide | position (m) |
Most Gym environments just return the positions and velocity of the
joints in the `.xml` file as the state of the environment. However, in
reacher the state is created by combining only certain elements of the
position and velocity, and performing some function transformations on them.
If one is to read the `.xml` for reacher then they will find 4 joints:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
|-----|-----------------------------|----------|----------|----------------------------------|-------|--------------------|
| 0 | angle of the first arm | -Inf | Inf | joint0 | hinge | angle (rad) |
| 1 | angle of the second arm | -Inf | Inf | joint1 | hinge | angle (rad) |
| 2 | x-coordinate of the target | -Inf | Inf | target_x | slide | position (m) |
| 3 | y-coordinate of the target | -Inf | Inf | target_y | slide | position (m) |
## Rewards
The reward consists of two parts:
- *reward_distance*: This reward is a measure of how far the *fingertip*
of the reacher (the unattached end) is from the target, with a more negative
value assigned for when the reacher's *fingertip* is further away from the
target. It is calculated as the negative vector norm of (position of
the fingertip - position of target), or *-norm("fingertip" - "target")*.
- *reward_control*: A negative reward for penalising the walker if
it takes actions that are too large. It is measured as the negative squared
Euclidean norm of the action, i.e. as *- sum(action<sup>2</sup>)*.
The total reward returned is ***reward*** *=* *reward_distance + reward_control*
Unlike other environments, Reacher does not allow you to specify weights for the individual reward terms.
However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms,
you should create a wrapper that computes the weighted reward from `info`.
## Starting State
All observations start in state
(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
with a noise added for stochasticity. A uniform noise in the range
[-0.1, 0.1] is added to the positional attributes, while the target position
is selected uniformly at random in a disk of radius 0.2 around the origin.
Independent, uniform noise in the
range of [-0.005, 0.005] is added to the velocities, and the last
element ("fingertip" - "target") is calculated at the end once everything
is set. The default setting has a framerate of 2 and a *dt = 2 * 0.01 = 0.02*
## Episode End
The episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 50 timesteps (with a new random target popping up if the reacher's fingertip reaches it before 50 timesteps)
2. Termination: Any of the state space values is no longer finite.
## Arguments
No additional arguments are currently supported (in v2 and lower),
but modifications can be made to the XML file in the assets folder
(or by changing the path to a modified XML file in another folder)..
```python
import gymnasium as gym
env = gym.make('Reacher-v4')
```
There is no v3 for Reacher, unlike the robot environments where a v3 and
beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 50,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MujocoEnv.__init__(
self,
"reacher.xml",
2,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
def step(self, a):
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist = -np.linalg.norm(vec)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (
ob,
reward,
False,
False,
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
)
def reset_model(self):
qpos = (
self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
+ self.init_qpos
)
while True:
self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
if np.linalg.norm(self.goal) < 0.2:
break
qpos[-2:] = self.goal
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
theta = self.data.qpos.flat[:2]
return np.concatenate(
[
np.cos(theta),
np.sin(theta),
self.data.qpos.flat[2:],
self.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target"),
]
)

View File

@ -0,0 +1,59 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "swimmer.xml", 4, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
ctrl_cost_coeff = 0.0001
xposbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.sim.data.qpos[0]
reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return (
ob,
reward,
False,
False,
dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl),
)
def _get_obs(self):
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos.flat[2:], qvel.flat])
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv),
)
return self._get_obs()

View File

@ -0,0 +1,129 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {}
class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(
self,
xml_file="swimmer.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-4,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 4, observation_space=observation_space, **kwargs
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
xy_position_before = self.sim.data.qpos[0:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.sim.data.qpos[0:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
forward_reward = self._forward_reward_weight * x_velocity
ctrl_cost = self.control_cost(action)
observation = self._get_obs()
reward = forward_reward - ctrl_cost
info = {
"reward_fwd": forward_reward,
"reward_ctrl": -ctrl_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, False, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
observation = np.concatenate([position, velocity]).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,232 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
class SwimmerEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment corresponds to the Swimmer environment described in Rémi Coulom's PhD thesis
["Reinforcement Learning Using Neural Networks, with Applications to Motor Control"](https://tel.archives-ouvertes.fr/tel-00003985/document).
The environment aims to increase the number of independent state and control
variables as compared to the classic control environments. The swimmers
consist of three or more segments ('***links***') and one less articulation
joints ('***rotors***') - one rotor joint connecting exactly two links to
form a linear chain. The swimmer is suspended in a two dimensional pool and
always starts in the same position (subject to some deviation drawn from an
uniform distribution), and the goal is to move as fast as possible towards
the right by applying torque on the rotors and using the fluids friction.
## Notes
The problem parameters are:
Problem parameters:
* *n*: number of body parts
* *m<sub>i</sub>*: mass of part *i* (*i* ∈ {1...n})
* *l<sub>i</sub>*: length of part *i* (*i* ∈ {1...n})
* *k*: viscous-friction coefficient
While the default environment has *n* = 3, *l<sub>i</sub>* = 0.1,
and *k* = 0.1. It is possible to pass a custom MuJoCo XML file during construction to increase the
number of links, or to tweak any of the parameters.
## Action Space
The action space is a `Box(-1, 1, (2,), float32)`. An action represents the torques applied between *links*
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
| 0 | Torque applied on the first rotor | -1 | 1 | motor1_rot | hinge | torque (N m) |
| 1 | Torque applied on the second rotor | -1 | 1 | motor2_rot | hinge | torque (N m) |
## Observation Space
By default, observations consists of:
* θ<sub>i</sub>: angle of part *i* with respect to the *x* axis
* θ<sub>i</sub>': its derivative with respect to time (angular velocity)
In the default case, observations do not include the x- and y-coordinates of the front tip. These may
be included by passing `exclude_current_positions_from_observation=False` during construction.
Then, the observation space will be `Box(-Inf, Inf, (10,), float64)` where the first two observations
represent the x- and y-coordinates of the front tip.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
By default, the observation is a `Box(-Inf, Inf, (8,), float64)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | angle of the front tip | -Inf | Inf | free_body_rot | hinge | angle (rad) |
| 1 | angle of the first rotor | -Inf | Inf | motor1_rot | hinge | angle (rad) |
| 2 | angle of the second rotor | -Inf | Inf | motor2_rot | hinge | angle (rad) |
| 3 | velocity of the tip along the x-axis | -Inf | Inf | slider1 | slide | velocity (m/s) |
| 4 | velocity of the tip along the y-axis | -Inf | Inf | slider2 | slide | velocity (m/s) |
| 5 | angular velocity of front tip | -Inf | Inf | free_body_rot | hinge | angular velocity (rad/s) |
| 6 | angular velocity of first rotor | -Inf | Inf | motor1_rot | hinge | angular velocity (rad/s) |
| 7 | angular velocity of second rotor | -Inf | Inf | motor2_rot | hinge | angular velocity (rad/s) |
| excluded | position of the tip along the x-axis | -Inf | Inf | slider1 | slide | position (m) |
| excluded | position of the tip along the y-axis | -Inf | Inf | slider2 | slide | position (m) |
## Rewards
The reward consists of two parts:
- *forward_reward*: A reward of moving forward which is measured
as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
the time between actions and is dependent on the frame_skip parameter
(default is 4), where the frametime is 0.01 - making the
default *dt = 4 * 0.01 = 0.04*. This reward would be positive if the swimmer
swims right as desired.
- *ctrl_cost*: A cost for penalising the swimmer if it takes
actions that are too large. It is measured as *`ctrl_cost_weight` *
sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
control and has a default value of 1e-4
The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state (0,0,0,0,0,0,0,0) with a Uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the initial state for stochasticity.
## Episode End
The episode truncates when the episode length is greater than 1000.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
gym.make('Swimmer-v4')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Swimmer-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"swimmer.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `1e-4` | Weight for _ctrl_cost_ term (see section on reward) |
| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(
self,
forward_reward_weight=1.0,
ctrl_cost_weight=1e-4,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64
)
MujocoEnv.__init__(
self, "swimmer.xml", 4, observation_space=observation_space, **kwargs
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
xy_position_before = self.data.qpos[0:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.data.qpos[0:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
forward_reward = self._forward_reward_weight * x_velocity
ctrl_cost = self.control_cost(action)
observation = self._get_obs()
reward = forward_reward - ctrl_cost
info = {
"reward_fwd": forward_reward,
"reward_ctrl": -ctrl_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, False, False, info
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = self.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
observation = np.concatenate([position, velocity]).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,61 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "walker2d.xml", 4, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
terminated = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def _get_obs(self):
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.5
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,168 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
xml_file="walker2d.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.8, 2.0),
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 4, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.sim.data.qpos[1:3]
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = healthy_z and healthy_angle
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,296 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class Walker2dEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment builds on the [hopper](https://gymnasium.farama.org/environments/mujoco/hopper/) environment
by adding another set of legs making it possible for the robot to walk forward instead of
hop. Like other Mujoco environments, this environment aims to increase the number of independent state
and control variables as compared to the classic control environments. The walker is a
two-dimensional two-legged figure that consist of seven main body parts - a single torso at the top
(with the two legs splitting after the torso), two thighs in the middle below the torso, two legs
in the bottom below the thighs, and two feet attached to the legs on which the entire body rests.
The goal is to walk in the in the forward (right)
direction by applying torques on the six hinges connecting the seven body parts.
## Action Space
The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|----------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
| 0 | Torque applied on the thigh rotor | -1 | 1 | thigh_joint | hinge | torque (N m) |
| 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) |
| 2 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) |
| 3 | Torque applied on the left thigh rotor | -1 | 1 | thigh_left_joint | hinge | torque (N m) |
| 4 | Torque applied on the left leg rotor | -1 | 1 | leg_left_joint | hinge | torque (N m) |
| 5 | Torque applied on the left foot rotor | -1 | 1 | foot_left_joint | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the walker,
followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
By default, observations do not include the x-coordinate of the torso. It may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be `Box(-Inf, Inf, (18,), float64)` where the first observation
represent the x-coordinates of the torso of the walker.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
of the torso will be returned in `info` with key `"x_position"`.
By default, observation is a `Box(-Inf, Inf, (17,), float64)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | -------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
| excluded | x-coordinate of the torso | -Inf | Inf | rootx | slide | position (m) |
| 0 | z-coordinate of the torso (height of Walker2d) | -Inf | Inf | rootz | slide | position (m) |
| 1 | angle of the torso | -Inf | Inf | rooty | hinge | angle (rad) |
| 2 | angle of the thigh joint | -Inf | Inf | thigh_joint | hinge | angle (rad) |
| 3 | angle of the leg joint | -Inf | Inf | leg_joint | hinge | angle (rad) |
| 4 | angle of the foot joint | -Inf | Inf | foot_joint | hinge | angle (rad) |
| 5 | angle of the left thigh joint | -Inf | Inf | thigh_left_joint | hinge | angle (rad) |
| 6 | angle of the left leg joint | -Inf | Inf | leg_left_joint | hinge | angle (rad) |
| 7 | angle of the left foot joint | -Inf | Inf | foot_left_joint | hinge | angle (rad) |
| 8 | velocity of the x-coordinate of the torso | -Inf | Inf | rootx | slide | velocity (m/s) |
| 9 | velocity of the z-coordinate (height) of the torso | -Inf | Inf | rootz | slide | velocity (m/s) |
| 10 | angular velocity of the angle of the torso | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
| 11 | angular velocity of the thigh hinge | -Inf | Inf | thigh_joint | hinge | angular velocity (rad/s) |
| 12 | angular velocity of the leg hinge | -Inf | Inf | leg_joint | hinge | angular velocity (rad/s) |
| 13 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) |
| 14 | angular velocity of the thigh hinge | -Inf | Inf | thigh_left_joint | hinge | angular velocity (rad/s) |
| 15 | angular velocity of the leg hinge | -Inf | Inf | leg_left_joint | hinge | angular velocity (rad/s) |
| 16 | angular velocity of the foot hinge | -Inf | Inf | foot_left_joint | hinge | angular velocity (rad/s) |
## Rewards
The reward consists of three parts:
- *healthy_reward*: Every timestep that the walker is alive, it receives a fixed reward of value `healthy_reward`,
- *forward_reward*: A reward of walking forward which is measured as
*`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*.
*dt* is the time between actions and is dependeent on the frame_skip parameter
(default is 4), where the frametime is 0.002 - making the default
*dt = 4 * 0.002 = 0.008*. This reward would be positive if the walker walks forward (positive x direction).
- *ctrl_cost*: A cost for penalising the walker if it
takes actions that are too large. It is measured as
*`ctrl_cost_weight` * sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is
a parameter set for the control and has a default value of 0.001
The total reward returned is ***reward*** *=* *healthy_reward bonus + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state
(0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity.
## Episode End
The walker is said to be unhealthy if any of the following happens:
1. Any of the state space values is no longer finite
2. The height of the walker is ***not*** in the closed interval specified by `healthy_z_range`
3. The absolute value of the angle (`observation[1]` if `exclude_current_positions_from_observation=False`, else `observation[2]`) is ***not*** in the closed interval specified by `healthy_angle_range`
If `terminate_when_unhealthy=True` is passed during construction (which is the default),
the episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
2. Termination: The walker is unhealthy
If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('Walker2d-v4')
```
v3 and beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Walker2d-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"walker2d.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `1e-3` | Weight for _ctr_cost_ term (see section on reward) |
| `healthy_reward` | **float** | `1.0` | Constant reward given if the ant is "healthy" after timestep |
| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the z-coordinate of the walker is no longer healthy |
| `healthy_z_range` | **tuple** | `(0.8, 2)` | The z-coordinate of the torso of the walker must be in this range to be considered healthy |
| `healthy_angle_range` | **tuple** | `(-1, 1)` | The angle must be in this range to be considered healthy |
| `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.8, 2.0),
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"walker2d.xml",
4,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.data.qpos[1:3]
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = healthy_z and healthy_angle
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = np.clip(self.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation