I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,77 @@
"""Root `__init__` of the gymnasium module setting the `__all__` of gymnasium modules."""
# isort: skip_file
from gymnasium.core import (
Env,
Wrapper,
ObservationWrapper,
ActionWrapper,
RewardWrapper,
)
from gymnasium.spaces.space import Space
from gymnasium.envs.registration import (
make,
spec,
register,
registry,
pprint_registry,
make_vec,
register_envs,
)
# necessary for `envs.__init__` which registers all gymnasium environments and loads plugins
from gymnasium import envs
from gymnasium import spaces, utils, vector, wrappers, error, logger
from gymnasium import experimental
__all__ = [
# core classes
"Env",
"Wrapper",
"ObservationWrapper",
"ActionWrapper",
"RewardWrapper",
"Space",
# registration
"make",
"make_vec",
"spec",
"register",
"registry",
"pprint_registry",
"register_envs",
# module folders
"envs",
"experimental",
"spaces",
"utils",
"vector",
"wrappers",
"error",
"logger",
]
__version__ = "0.29.1"
# Initializing pygame initializes audio connections through SDL. SDL uses alsa by default on all Linux systems
# SDL connecting to alsa frequently create these giant lists of warnings every time you import an environment using
# pygame
# DSP is far more benign (and should probably be the default in SDL anyways)
import os
import sys
if sys.platform.startswith("linux"):
os.environ["SDL_AUDIODRIVER"] = "dsp"
os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide"
try:
from farama_notifications import notifications
if "gymnasium" in notifications and __version__ in notifications["gymnasium"]:
print(notifications["gymnasium"][__version__], file=sys.stderr)
except Exception: # nosec
pass

View File

@ -0,0 +1,602 @@
"""Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper."""
from __future__ import annotations
from copy import deepcopy
from typing import TYPE_CHECKING, Any, Generic, SupportsFloat, TypeVar
import numpy as np
from gymnasium import logger, spaces
from gymnasium.utils import RecordConstructorArgs, seeding
if TYPE_CHECKING:
from gymnasium.envs.registration import EnvSpec, WrapperSpec
__all__ = [
"Env",
"Wrapper",
"ObservationWrapper",
"RewardWrapper",
"ActionWrapper",
"ObsType",
"ActType",
"RenderFrame",
"WrapperObsType",
"WrapperActType",
]
ObsType = TypeVar("ObsType")
ActType = TypeVar("ActType")
RenderFrame = TypeVar("RenderFrame")
class Env(Generic[ObsType, ActType]):
r"""The main Gymnasium class for implementing Reinforcement Learning Agents environments.
The class encapsulates an environment with arbitrary behind-the-scenes dynamics through the :meth:`step` and :meth:`reset` functions.
An environment can be partially or fully observed by single agents. For multi-agent environments, see PettingZoo.
The main API methods that users of this class need to know are:
- :meth:`step` - Updates an environment with actions returning the next agent observation, the reward for taking that actions,
if the environment has terminated or truncated due to the latest action and information from the environment about the step, i.e. metrics, debug info.
- :meth:`reset` - Resets the environment to an initial state, required before calling step.
Returns the first agent observation for an episode and information, i.e. metrics, debug info.
- :meth:`render` - Renders the environments to help visualise what the agent see, examples modes are "human", "rgb_array", "ansi" for text.
- :meth:`close` - Closes the environment, important when external software is used, i.e. pygame for rendering, databases
Environments have additional attributes for users to understand the implementation
- :attr:`action_space` - The Space object corresponding to valid actions, all valid actions should be contained within the space.
- :attr:`observation_space` - The Space object corresponding to valid observations, all valid observations should be contained within the space.
- :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards for an agent over an episode.
The default reward range is set to :math:`(-\infty,+\infty)`.
- :attr:`spec` - An environment spec that contains the information used to initialize the environment from :meth:`gymnasium.make`
- :attr:`metadata` - The metadata of the environment, i.e. render modes, render fps
- :attr:`np_random` - The random number generator for the environment. This is automatically assigned during
``super().reset(seed=seed)`` and when assessing ``self.np_random``.
.. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class
Note:
To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``.
"""
# Set this in SOME subclasses
metadata: dict[str, Any] = {"render_modes": []}
# define render_mode if your environment supports rendering
render_mode: str | None = None
reward_range = (-float("inf"), float("inf"))
spec: EnvSpec | None = None
# Set these in ALL subclasses
action_space: spaces.Space[ActType]
observation_space: spaces.Space[ObsType]
# Created
_np_random: np.random.Generator | None = None
def step(
self, action: ActType
) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
"""Run one timestep of the environment's dynamics using the agent actions.
When the end of an episode is reached (``terminated or truncated``), it is necessary to call :meth:`reset` to
reset this environment's state for the next episode.
.. versionchanged:: 0.26
The Step API was changed removing ``done`` in favor of ``terminated`` and ``truncated`` to make it clearer
to users when the environment had terminated or truncated which is critical for reinforcement learning
bootstrapping algorithms.
Args:
action (ActType): an action provided by the agent to update the environment state.
Returns:
observation (ObsType): An element of the environment's :attr:`observation_space` as the next observation due to the agent actions.
An example is a numpy array containing the positions and velocities of the pole in CartPole.
reward (SupportsFloat): The reward as a result of taking the action.
terminated (bool): Whether the agent reaches the terminal state (as defined under the MDP of the task)
which can be positive or negative. An example is reaching the goal state or moving into the lava from
the Sutton and Barton, Gridworld. If true, the user needs to call :meth:`reset`.
truncated (bool): Whether the truncation condition outside the scope of the MDP is satisfied.
Typically, this is a timelimit, but could also be used to indicate an agent physically going out of bounds.
Can be used to end the episode prematurely before a terminal state is reached.
If true, the user needs to call :meth:`reset`.
info (dict): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging).
This might, for instance, contain: metrics that describe the agent's performance state, variables that are
hidden from observations, or individual reward terms that are combined to produce the total reward.
In OpenAI Gym <v26, it contains "TimeLimit.truncated" to distinguish truncation and termination,
however this is deprecated in favour of returning terminated and truncated variables.
done (bool): (Deprecated) A boolean value for if the episode has ended, in which case further :meth:`step` calls will
return undefined results. This was removed in OpenAI Gym v26 in favor of terminated and truncated attributes.
A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully,
a certain timelimit was exceeded, or the physics simulation has entered an invalid state.
"""
raise NotImplementedError
def reset(
self,
*,
seed: int | None = None,
options: dict[str, Any] | None = None,
) -> tuple[ObsType, dict[str, Any]]: # type: ignore
"""Resets the environment to an initial internal state, returning an initial observation and info.
This method generates a new starting state often with some randomness to ensure that the agent explores the
state space and learns a generalised policy about the environment. This randomness can be controlled
with the ``seed`` parameter otherwise if the environment already has a random number generator and
:meth:`reset` is called with ``seed=None``, the RNG is not reset.
Therefore, :meth:`reset` should (in the typical use case) be called with a seed right after initialization and then never again.
For Custom environments, the first line of :meth:`reset` should be ``super().reset(seed=seed)`` which implements
the seeding correctly.
.. versionchanged:: v0.25
The ``return_info`` parameter was removed and now info is expected to be returned.
Args:
seed (optional int): The seed that is used to initialize the environment's PRNG (`np_random`).
If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed,
a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom).
However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset.
If you pass an integer, the PRNG will be reset even if it already exists.
Usually, you want to pass an integer *right after the environment has been initialized and then never again*.
Please refer to the minimal example above to see this paradigm in action.
options (optional dict): Additional information to specify how the environment is reset (optional,
depending on the specific environment)
Returns:
observation (ObsType): Observation of the initial state. This will be an element of :attr:`observation_space`
(typically a numpy array) and is analogous to the observation returned by :meth:`step`.
info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
the ``info`` returned by :meth:`step`.
"""
# Initialize the RNG if the seed is manually passed
if seed is not None:
self._np_random, seed = seeding.np_random(seed)
def render(self) -> RenderFrame | list[RenderFrame] | None:
"""Compute the render frames as specified by :attr:`render_mode` during the initialization of the environment.
The environment's :attr:`metadata` render modes (`env.metadata["render_modes"]`) should contain the possible
ways to implement the render modes. In addition, list versions for most render modes is achieved through
`gymnasium.make` which automatically applies a wrapper to collect rendered frames.
Note:
As the :attr:`render_mode` is known during ``__init__``, the objects used to render the environment state
should be initialised in ``__init__``.
By convention, if the :attr:`render_mode` is:
- None (default): no render is computed.
- "human": The environment is continuously rendered in the current display or terminal, usually for human consumption.
This rendering should occur during :meth:`step` and :meth:`render` doesn't need to be called. Returns ``None``.
- "rgb_array": Return a single frame representing the current state of the environment.
A frame is a ``np.ndarray`` with shape ``(x, y, 3)`` representing RGB values for an x-by-y pixel image.
- "ansi": Return a strings (``str``) or ``StringIO.StringIO`` containing a terminal-style text representation
for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors).
- "rgb_array_list" and "ansi_list": List based version of render modes are possible (except Human) through the
wrapper, :py:class:`gymnasium.wrappers.RenderCollection` that is automatically applied during ``gymnasium.make(..., render_mode="rgb_array_list")``.
The frames collected are popped after :meth:`render` is called or :meth:`reset`.
Note:
Make sure that your class's :attr:`metadata` ``"render_modes"`` key includes the list of supported modes.
.. versionchanged:: 0.25.0
The render function was changed to no longer accept parameters, rather these parameters should be specified
in the environment initialised, i.e., ``gymnasium.make("CartPole-v1", render_mode="human")``
"""
raise NotImplementedError
def close(self):
"""After the user has finished using the environment, close contains the code necessary to "clean up" the environment.
This is critical for closing rendering windows, database or HTTP connections.
Calling ``close`` on an already closed environment has no effect and won't raise an error.
"""
pass
@property
def unwrapped(self) -> Env[ObsType, ActType]:
"""Returns the base non-wrapped environment.
Returns:
Env: The base non-wrapped :class:`gymnasium.Env` instance
"""
return self
@property
def np_random(self) -> np.random.Generator:
"""Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed.
Returns:
Instances of `np.random.Generator`
"""
if self._np_random is None:
self._np_random, _ = seeding.np_random()
return self._np_random
@np_random.setter
def np_random(self, value: np.random.Generator):
self._np_random = value
def __str__(self):
"""Returns a string of the environment with :attr:`spec` id's if :attr:`spec.
Returns:
A string identifying the environment
"""
if self.spec is None:
return f"<{type(self).__name__} instance>"
else:
return f"<{type(self).__name__}<{self.spec.id}>>"
def __enter__(self):
"""Support with-statement for the environment."""
return self
def __exit__(self, *args: Any):
"""Support with-statement for the environment and closes the environment."""
self.close()
# propagate exception
return False
def get_wrapper_attr(self, name: str) -> Any:
"""Gets the attribute `name` from the environment."""
return getattr(self, name)
WrapperObsType = TypeVar("WrapperObsType")
WrapperActType = TypeVar("WrapperActType")
class Wrapper(
Env[WrapperObsType, WrapperActType],
Generic[WrapperObsType, WrapperActType, ObsType, ActType],
):
"""Wraps a :class:`gymnasium.Env` to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
This class is the base class of all wrappers to change the behavior of the underlying environment.
Wrappers that inherit from this class can modify the :attr:`action_space`, :attr:`observation_space`,
:attr:`reward_range` and :attr:`metadata` attributes, without changing the underlying environment's attributes.
Moreover, the behavior of the :meth:`step` and :meth:`reset` methods can be changed by these wrappers.
Some attributes (:attr:`spec`, :attr:`render_mode`, :attr:`np_random`) will point back to the wrapper's environment
(i.e. to the corresponding attributes of :attr:`env`).
Note:
If you inherit from :class:`Wrapper`, don't forget to call ``super().__init__(env)``
"""
def __init__(self, env: Env[ObsType, ActType]):
"""Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
Args:
env: The environment to wrap
"""
self.env = env
self._action_space: spaces.Space[WrapperActType] | None = None
self._observation_space: spaces.Space[WrapperObsType] | None = None
self._reward_range: tuple[SupportsFloat, SupportsFloat] | None = None
self._metadata: dict[str, Any] | None = None
self._cached_spec: EnvSpec | None = None
def __getattr__(self, name: str) -> Any:
"""Returns an attribute with ``name``, unless ``name`` starts with an underscore.
Args:
name: The variable name
Returns:
The value of the variable in the wrapper stack
Warnings:
This feature is deprecated and removed in v1.0 and replaced with `env.get_attr(name})`
"""
if name == "_np_random":
raise AttributeError(
"Can't access `_np_random` of a wrapper, use `self.unwrapped._np_random` or `self.np_random`."
)
elif name.startswith("_"):
raise AttributeError(f"accessing private attribute '{name}' is prohibited")
logger.warn(
f"env.{name} to get variables from other wrappers is deprecated and will be removed in v1.0, "
f"to get this variable you can do `env.unwrapped.{name}` for environment variables or `env.get_wrapper_attr('{name}')` that will search the reminding wrappers."
)
return getattr(self.env, name)
def get_wrapper_attr(self, name: str) -> Any:
"""Gets an attribute from the wrapper and lower environments if `name` doesn't exist in this object.
Args:
name: The variable name to get
Returns:
The variable with name in wrapper or lower environments
"""
if name in self.__dir__(): # todo change in v1.0.0 to `hasattr`
return getattr(self, name)
else:
try:
return self.env.get_wrapper_attr(name)
except AttributeError as e:
raise AttributeError(
f"wrapper {self.class_name()} has no attribute {name!r}"
) from e
@property
def spec(self) -> EnvSpec | None:
"""Returns the :attr:`Env` :attr:`spec` attribute with the `WrapperSpec` if the wrapper inherits from `EzPickle`."""
if self._cached_spec is not None:
return self._cached_spec
env_spec = self.env.spec
if env_spec is not None:
# See if the wrapper inherits from `RecordConstructorArgs` then add the kwargs otherwise use `None` for the wrapper kwargs. This will raise an error in `make`
if isinstance(self, RecordConstructorArgs):
kwargs = getattr(self, "_saved_kwargs")
if "env" in kwargs:
kwargs = deepcopy(kwargs)
kwargs.pop("env")
else:
kwargs = None
from gymnasium.envs.registration import WrapperSpec
wrapper_spec = WrapperSpec(
name=self.class_name(),
entry_point=f"{self.__module__}:{type(self).__name__}",
kwargs=kwargs,
)
# to avoid reference issues we deepcopy the prior environments spec and add the new information
env_spec = deepcopy(env_spec)
env_spec.additional_wrappers += (wrapper_spec,)
self._cached_spec = env_spec
return env_spec
@classmethod
def wrapper_spec(cls, **kwargs: Any) -> WrapperSpec:
"""Generates a `WrapperSpec` for the wrappers."""
from gymnasium.envs.registration import WrapperSpec
return WrapperSpec(
name=cls.class_name(),
entry_point=f"{cls.__module__}:{cls.__name__}",
kwargs=kwargs,
)
@classmethod
def class_name(cls) -> str:
"""Returns the class name of the wrapper."""
return cls.__name__
@property
def action_space(
self,
) -> spaces.Space[ActType] | spaces.Space[WrapperActType]:
"""Return the :attr:`Env` :attr:`action_space` unless overwritten then the wrapper :attr:`action_space` is used."""
if self._action_space is None:
return self.env.action_space
return self._action_space
@action_space.setter
def action_space(self, space: spaces.Space[WrapperActType]):
self._action_space = space
@property
def observation_space(
self,
) -> spaces.Space[ObsType] | spaces.Space[WrapperObsType]:
"""Return the :attr:`Env` :attr:`observation_space` unless overwritten then the wrapper :attr:`observation_space` is used."""
if self._observation_space is None:
return self.env.observation_space
return self._observation_space
@observation_space.setter
def observation_space(self, space: spaces.Space[WrapperObsType]):
self._observation_space = space
@property
def reward_range(self) -> tuple[SupportsFloat, SupportsFloat]:
"""Return the :attr:`Env` :attr:`reward_range` unless overwritten then the wrapper :attr:`reward_range` is used."""
if self._reward_range is None:
return self.env.reward_range
logger.warn("The `reward_range` is deprecated and will be removed in v1.0")
return self._reward_range
@reward_range.setter
def reward_range(self, value: tuple[SupportsFloat, SupportsFloat]):
self._reward_range = value
@property
def metadata(self) -> dict[str, Any]:
"""Returns the :attr:`Env` :attr:`metadata`."""
if self._metadata is None:
return self.env.metadata
return self._metadata
@metadata.setter
def metadata(self, value: dict[str, Any]):
self._metadata = value
@property
def render_mode(self) -> str | None:
"""Returns the :attr:`Env` :attr:`render_mode`."""
return self.env.render_mode
@property
def np_random(self) -> np.random.Generator:
"""Returns the :attr:`Env` :attr:`np_random` attribute."""
return self.env.np_random
@np_random.setter
def np_random(self, value: np.random.Generator):
self.env.np_random = value
@property
def _np_random(self):
"""This code will never be run due to __getattr__ being called prior this.
It seems that @property overwrites the variable (`_np_random`) meaning that __getattr__ gets called with the missing variable.
"""
raise AttributeError(
"Can't access `_np_random` of a wrapper, use `.unwrapped._np_random` or `.np_random`."
)
def step(
self, action: WrapperActType
) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
"""Uses the :meth:`step` of the :attr:`env` that can be overwritten to change the returned data."""
return self.env.step(action)
def reset(
self, *, seed: int | None = None, options: dict[str, Any] | None = None
) -> tuple[WrapperObsType, dict[str, Any]]:
"""Uses the :meth:`reset` of the :attr:`env` that can be overwritten to change the returned data."""
return self.env.reset(seed=seed, options=options)
def render(self) -> RenderFrame | list[RenderFrame] | None:
"""Uses the :meth:`render` of the :attr:`env` that can be overwritten to change the returned data."""
return self.env.render()
def close(self):
"""Closes the wrapper and :attr:`env`."""
return self.env.close()
def __str__(self):
"""Returns the wrapper name and the :attr:`env` representation string."""
return f"<{type(self).__name__}{self.env}>"
def __repr__(self):
"""Returns the string representation of the wrapper."""
return str(self)
@property
def unwrapped(self) -> Env[ObsType, ActType]:
"""Returns the base environment of the wrapper.
This will be the bare :class:`gymnasium.Env` environment, underneath all layers of wrappers.
"""
return self.env.unwrapped
class ObservationWrapper(Wrapper[WrapperObsType, ActType, ObsType, ActType]):
"""Superclass of wrappers that can modify observations using :meth:`observation` for :meth:`reset` and :meth:`step`.
If you would like to apply a function to only the observation before
passing it to the learning code, you can simply inherit from :class:`ObservationWrapper` and overwrite the method
:meth:`observation` to implement that transformation. The transformation defined in that method must be
reflected by the :attr:`env` observation space. Otherwise, you need to specify the new observation space of the
wrapper by setting :attr:`self.observation_space` in the :meth:`__init__` method of your wrapper.
Among others, Gymnasium provides the observation wrapper :class:`TimeAwareObservation`, which adds information about the
index of the timestep to the observation.
"""
def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the observation wrapper."""
Wrapper.__init__(self, env)
def reset(
self, *, seed: int | None = None, options: dict[str, Any] | None = None
) -> tuple[WrapperObsType, dict[str, Any]]:
"""Modifies the :attr:`env` after calling :meth:`reset`, returning a modified observation using :meth:`self.observation`."""
obs, info = self.env.reset(seed=seed, options=options)
return self.observation(obs), info
def step(
self, action: ActType
) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
"""Modifies the :attr:`env` after calling :meth:`step` using :meth:`self.observation` on the returned observations."""
observation, reward, terminated, truncated, info = self.env.step(action)
return self.observation(observation), reward, terminated, truncated, info
def observation(self, observation: ObsType) -> WrapperObsType:
"""Returns a modified observation.
Args:
observation: The :attr:`env` observation
Returns:
The modified observation
"""
raise NotImplementedError
class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
"""Superclass of wrappers that can modify the returning reward from a step.
If you would like to apply a function to the reward that is returned by the base environment before
passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method
:meth:`reward` to implement that transformation.
This transformation might change the :attr:`reward_range`; to specify the :attr:`reward_range` of your wrapper,
you can simply define :attr:`self.reward_range` in :meth:`__init__`.
"""
def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the Reward wrapper."""
Wrapper.__init__(self, env)
def step(
self, action: ActType
) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
"""Modifies the :attr:`env` :meth:`step` reward using :meth:`self.reward`."""
observation, reward, terminated, truncated, info = self.env.step(action)
return observation, self.reward(reward), terminated, truncated, info
def reward(self, reward: SupportsFloat) -> SupportsFloat:
"""Returns a modified environment ``reward``.
Args:
reward: The :attr:`env` :meth:`step` reward
Returns:
The modified `reward`
"""
raise NotImplementedError
class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
"""Superclass of wrappers that can modify the action before :meth:`env.step`.
If you would like to apply a function to the action before passing it to the base environment,
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
that transformation. The transformation defined in that method must take values in the base environments
action space. However, its domain might differ from the original action space.
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in
the :meth:`__init__` method of your wrapper.
Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions.
"""
def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the action wrapper."""
Wrapper.__init__(self, env)
def step(
self, action: WrapperActType
) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
"""Runs the :attr:`env` :meth:`env.step` using the modified ``action`` from :meth:`self.action`."""
return self.env.step(self.action(action))
def action(self, action: WrapperActType) -> ActType:
"""Returns a modified action before :meth:`env.step` is called.
Args:
action: The original :meth:`step` actions
Returns:
The modified actions
"""
raise NotImplementedError

View File

@ -0,0 +1,387 @@
"""Registers the internal gym envs then loads the env plugins for module using the entry point."""
from typing import Any
from gymnasium.envs.registration import (
load_plugin_envs,
make,
pprint_registry,
register,
registry,
spec,
)
# Classic
# ----------------------------------------
register(
id="CartPole-v0",
entry_point="gymnasium.envs.classic_control.cartpole:CartPoleEnv",
vector_entry_point="gymnasium.envs.classic_control.cartpole:CartPoleVectorEnv",
max_episode_steps=200,
reward_threshold=195.0,
)
register(
id="CartPole-v1",
entry_point="gymnasium.envs.classic_control.cartpole:CartPoleEnv",
vector_entry_point="gymnasium.envs.classic_control.cartpole:CartPoleVectorEnv",
max_episode_steps=500,
reward_threshold=475.0,
)
register(
id="MountainCar-v0",
entry_point="gymnasium.envs.classic_control.mountain_car:MountainCarEnv",
max_episode_steps=200,
reward_threshold=-110.0,
)
register(
id="MountainCarContinuous-v0",
entry_point="gymnasium.envs.classic_control.continuous_mountain_car:Continuous_MountainCarEnv",
max_episode_steps=999,
reward_threshold=90.0,
)
register(
id="Pendulum-v1",
entry_point="gymnasium.envs.classic_control.pendulum:PendulumEnv",
max_episode_steps=200,
)
register(
id="Acrobot-v1",
entry_point="gymnasium.envs.classic_control.acrobot:AcrobotEnv",
reward_threshold=-100.0,
max_episode_steps=500,
)
# Phys2d (jax classic control)
# ----------------------------------------
register(
id="phys2d/CartPole-v0",
entry_point="gymnasium.envs.phys2d.cartpole:CartPoleJaxEnv",
vector_entry_point="gymnasium.envs.phys2d.cartpole:CartPoleJaxVectorEnv",
max_episode_steps=200,
reward_threshold=195.0,
)
register(
id="phys2d/CartPole-v1",
entry_point="gymnasium.envs.phys2d.cartpole:CartPoleJaxEnv",
vector_entry_point="gymnasium.envs.phys2d.cartpole:CartPoleJaxVectorEnv",
max_episode_steps=500,
reward_threshold=475.0,
)
register(
id="phys2d/Pendulum-v0",
entry_point="gymnasium.envs.phys2d.pendulum:PendulumJaxEnv",
vector_entry_point="gymnasium.envs.phys2d.pendulum:PendulumJaxVectorEnv",
max_episode_steps=200,
)
# Box2d
# ----------------------------------------
register(
id="LunarLander-v2",
entry_point="gymnasium.envs.box2d.lunar_lander:LunarLander",
max_episode_steps=1000,
reward_threshold=200,
)
register(
id="LunarLanderContinuous-v2",
entry_point="gymnasium.envs.box2d.lunar_lander:LunarLander",
kwargs={"continuous": True},
max_episode_steps=1000,
reward_threshold=200,
)
register(
id="BipedalWalker-v3",
entry_point="gymnasium.envs.box2d.bipedal_walker:BipedalWalker",
max_episode_steps=1600,
reward_threshold=300,
)
register(
id="BipedalWalkerHardcore-v3",
entry_point="gymnasium.envs.box2d.bipedal_walker:BipedalWalker",
kwargs={"hardcore": True},
max_episode_steps=2000,
reward_threshold=300,
)
register(
id="CarRacing-v2",
entry_point="gymnasium.envs.box2d.car_racing:CarRacing",
max_episode_steps=1000,
reward_threshold=900,
)
# Toy Text
# ----------------------------------------
register(
id="Blackjack-v1",
entry_point="gymnasium.envs.toy_text.blackjack:BlackjackEnv",
kwargs={"sab": True, "natural": False},
)
register(
id="FrozenLake-v1",
entry_point="gymnasium.envs.toy_text.frozen_lake:FrozenLakeEnv",
kwargs={"map_name": "4x4"},
max_episode_steps=100,
reward_threshold=0.70, # optimum = 0.74
)
register(
id="FrozenLake8x8-v1",
entry_point="gymnasium.envs.toy_text.frozen_lake:FrozenLakeEnv",
kwargs={"map_name": "8x8"},
max_episode_steps=200,
reward_threshold=0.85, # optimum = 0.91
)
register(
id="CliffWalking-v0",
entry_point="gymnasium.envs.toy_text.cliffwalking:CliffWalkingEnv",
)
register(
id="Taxi-v3",
entry_point="gymnasium.envs.toy_text.taxi:TaxiEnv",
reward_threshold=8, # optimum = 8.46
max_episode_steps=200,
)
# Tabular
# ----------------------------------------
register(
id="tabular/Blackjack-v0",
entry_point="gymnasium.envs.tabular.blackjack:BlackJackJaxEnv",
kwargs={"sutton_and_barto": True, "natural": False},
)
register(
id="tabular/CliffWalking-v0",
entry_point="gymnasium.envs.tabular.cliffwalking:CliffWalkingJaxEnv",
)
# Mujoco
# ----------------------------------------
# 2D
register(
id="Reacher-v2",
entry_point="gymnasium.envs.mujoco:ReacherEnv",
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id="Reacher-v4",
entry_point="gymnasium.envs.mujoco.reacher_v4:ReacherEnv",
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id="Pusher-v2",
entry_point="gymnasium.envs.mujoco:PusherEnv",
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id="Pusher-v4",
entry_point="gymnasium.envs.mujoco.pusher_v4:PusherEnv",
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id="InvertedPendulum-v2",
entry_point="gymnasium.envs.mujoco:InvertedPendulumEnv",
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id="InvertedPendulum-v4",
entry_point="gymnasium.envs.mujoco.inverted_pendulum_v4:InvertedPendulumEnv",
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id="InvertedDoublePendulum-v2",
entry_point="gymnasium.envs.mujoco:InvertedDoublePendulumEnv",
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id="InvertedDoublePendulum-v4",
entry_point="gymnasium.envs.mujoco.inverted_double_pendulum_v4:InvertedDoublePendulumEnv",
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id="HalfCheetah-v2",
entry_point="gymnasium.envs.mujoco:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="HalfCheetah-v3",
entry_point="gymnasium.envs.mujoco.half_cheetah_v3:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="HalfCheetah-v4",
entry_point="gymnasium.envs.mujoco.half_cheetah_v4:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="Hopper-v2",
entry_point="gymnasium.envs.mujoco:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Hopper-v3",
entry_point="gymnasium.envs.mujoco.hopper_v3:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Hopper-v4",
entry_point="gymnasium.envs.mujoco.hopper_v4:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Swimmer-v2",
entry_point="gymnasium.envs.mujoco:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Swimmer-v3",
entry_point="gymnasium.envs.mujoco.swimmer_v3:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Swimmer-v4",
entry_point="gymnasium.envs.mujoco.swimmer_v4:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Walker2d-v2",
max_episode_steps=1000,
entry_point="gymnasium.envs.mujoco:Walker2dEnv",
)
register(
id="Walker2d-v3",
max_episode_steps=1000,
entry_point="gymnasium.envs.mujoco.walker2d_v3:Walker2dEnv",
)
register(
id="Walker2d-v4",
max_episode_steps=1000,
entry_point="gymnasium.envs.mujoco.walker2d_v4:Walker2dEnv",
)
register(
id="Ant-v2",
entry_point="gymnasium.envs.mujoco:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Ant-v3",
entry_point="gymnasium.envs.mujoco.ant_v3:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Ant-v4",
entry_point="gymnasium.envs.mujoco.ant_v4:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Humanoid-v2",
entry_point="gymnasium.envs.mujoco:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="Humanoid-v3",
entry_point="gymnasium.envs.mujoco.humanoid_v3:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="Humanoid-v4",
entry_point="gymnasium.envs.mujoco.humanoid_v4:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="HumanoidStandup-v2",
entry_point="gymnasium.envs.mujoco:HumanoidStandupEnv",
max_episode_steps=1000,
)
register(
id="HumanoidStandup-v4",
entry_point="gymnasium.envs.mujoco.humanoidstandup_v4:HumanoidStandupEnv",
max_episode_steps=1000,
)
# --- For shimmy compatibility
def _raise_shimmy_error(*args: Any, **kwargs: Any):
raise ImportError(
"To use the gym compatibility environments, run `pip install shimmy[gym-v21]` or `pip install shimmy[gym-v26]`"
)
# When installed, shimmy will re-register these environments with the correct entry_point
register(id="GymV21Environment-v0", entry_point=_raise_shimmy_error)
register(id="GymV26Environment-v0", entry_point=_raise_shimmy_error)
# Hook to load plugins from entry points
load_plugin_envs()

View File

@ -0,0 +1,3 @@
from gymnasium.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
from gymnasium.envs.box2d.car_racing import CarRacing
from gymnasium.envs.box2d.lunar_lander import LunarLander, LunarLanderContinuous

View File

@ -0,0 +1,861 @@
__credits__ = ["Andrea PIERRÉ"]
import math
from typing import TYPE_CHECKING, List, Optional
import numpy as np
import gymnasium as gym
from gymnasium import error, spaces
from gymnasium.error import DependencyNotInstalled
from gymnasium.utils import EzPickle
try:
import Box2D
from Box2D.b2 import (
circleShape,
contactListener,
edgeShape,
fixtureDef,
polygonShape,
revoluteJointDef,
)
except ImportError as e:
raise DependencyNotInstalled(
"Box2D is not installed, run `pip install gymnasium[box2d]`"
) from e
if TYPE_CHECKING:
import pygame
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MOTORS_TORQUE = 80
SPEED_HIP = 4
SPEED_KNEE = 6
LIDAR_RANGE = 160 / SCALE
INITIAL_RANDOM = 5
HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
LEG_DOWN = -8 / SCALE
LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
VIEWPORT_W = 600
VIEWPORT_H = 400
TERRAIN_STEP = 14 / SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
FRICTION = 2.5
HULL_FD = fixtureDef(
shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0,
) # 0.99 bouncy
LEG_FD = fixtureDef(
shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
)
LOWER_FD = fixtureDef(
shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
)
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if (
self.env.hull == contact.fixtureA.body
or self.env.hull == contact.fixtureB.body
):
self.env.game_over = True
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = True
def EndContact(self, contact):
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = False
class BipedalWalker(gym.Env, EzPickle):
"""
## Description
This is a simple 4-joint walker robot environment.
There are two versions:
- Normal, with slightly uneven terrain.
- Hardcore, with ladders, stumps, pitfalls.
To solve the normal version, you need to get 300 points in 1600 time steps.
To solve the hardcore version, you need 300 points in 2000 time steps.
A heuristic is provided for testing. It's also useful to get demonstrations
to learn from. To run the heuristic:
```
python gymnasium/envs/box2d/bipedal_walker.py
```
## Action Space
Actions are motor speed values in the [-1, 1] range for each of the
4 joints at both hips and knees.
## Observation Space
State consists of hull angle speed, angular velocity, horizontal speed,
vertical speed, position of joints and joints angular speed, legs contact
with ground, and 10 lidar rangefinder measurements. There are no coordinates
in the state vector.
## Rewards
Reward is given for moving forward, totaling 300+ points up to the far end.
If the robot falls, it gets -100. Applying motor torque costs a small
amount of points. A more optimal agent will get a better score.
## Starting State
The walker starts standing at the left end of the terrain with the hull
horizontal, and both legs in the same position with a slight knee angle.
## Episode Termination
The episode will terminate if the hull gets in contact with the ground or
if the walker exceeds the right end of the terrain length.
## Arguments
To use to the _hardcore_ environment, you need to specify the
`hardcore=True` argument like below:
```python
import gymnasium as gym
env = gym.make("BipedalWalker-v3", hardcore=True)
```
## Version History
- v3: Returns the closest lidar trace instead of furthest;
faster video recording
- v2: Count energy spent
- v1: Legs now report contact with ground; motors have higher torque and
speed; ground has higher friction; lidar rendered less nervously.
- v0: Initial version
<!-- ## References -->
## Credits
Created by Oleg Klimov
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": FPS,
}
def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False):
EzPickle.__init__(self, render_mode, hardcore)
self.isopen = True
self.world = Box2D.b2World()
self.terrain: List[Box2D.b2Body] = []
self.hull: Optional[Box2D.b2Body] = None
self.prev_shaping = None
self.hardcore = hardcore
self.fd_polygon = fixtureDef(
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
friction=FRICTION,
)
self.fd_edge = fixtureDef(
shape=edgeShape(vertices=[(0, 0), (1, 1)]),
friction=FRICTION,
categoryBits=0x0001,
)
# we use 5.0 to represent the joints moving at maximum
# 5 x the rated speed due to impulses from ground contact etc.
low = np.array(
[
-math.pi,
-5.0,
-5.0,
-5.0,
-math.pi,
-5.0,
-math.pi,
-5.0,
-0.0,
-math.pi,
-5.0,
-math.pi,
-5.0,
-0.0,
]
+ [-1.0] * 10
).astype(np.float32)
high = np.array(
[
math.pi,
5.0,
5.0,
5.0,
math.pi,
5.0,
math.pi,
5.0,
5.0,
math.pi,
5.0,
math.pi,
5.0,
5.0,
]
+ [1.0] * 10
).astype(np.float32)
self.action_space = spaces.Box(
np.array([-1, -1, -1, -1]).astype(np.float32),
np.array([1, 1, 1, 1]).astype(np.float32),
)
self.observation_space = spaces.Box(low, high)
# state = [
# self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
# 2.0 * self.hull.angularVelocity / FPS,
# 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
# 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
# self.joints[
# 0
# ].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
# self.joints[0].speed / SPEED_HIP,
# self.joints[1].angle + 1.0,
# self.joints[1].speed / SPEED_KNEE,
# 1.0 if self.legs[1].ground_contact else 0.0,
# self.joints[2].angle,
# self.joints[2].speed / SPEED_HIP,
# self.joints[3].angle + 1.0,
# self.joints[3].speed / SPEED_KNEE,
# 1.0 if self.legs[3].ground_contact else 0.0,
# ]
# state += [l.fraction for l in self.lidar]
self.render_mode = render_mode
self.screen: Optional[pygame.Surface] = None
self.clock = None
def _destroy(self):
if not self.terrain:
return
self.world.contactListener = None
for t in self.terrain:
self.world.DestroyBody(t)
self.terrain = []
self.world.DestroyBody(self.hull)
self.hull = None
for leg in self.legs:
self.world.DestroyBody(leg)
self.legs = []
self.joints = []
def _generate_terrain(self, hardcore):
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
state = GRASS
velocity = 0.0
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
self.terrain_x = []
self.terrain_y = []
stair_steps, stair_width, stair_height = 0, 0, 0
original_y = 0
for i in range(TERRAIN_LENGTH):
x = i * TERRAIN_STEP
self.terrain_x.append(x)
if state == GRASS and not oneshot:
velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD:
velocity += self.np_random.uniform(-1, 1) / SCALE # 1
y += velocity
elif state == PIT and oneshot:
counter = self.np_random.integers(3, 5)
poly = [
(x, y),
(x + TERRAIN_STEP, y),
(x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
(x, y - 4 * TERRAIN_STEP),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
self.fd_polygon.shape.vertices = [
(p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
]
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
counter += 2
original_y = y
elif state == PIT and not oneshot:
y = original_y
if counter > 1:
y -= 4 * TERRAIN_STEP
elif state == STUMP and oneshot:
counter = self.np_random.integers(1, 3)
poly = [
(x, y),
(x + counter * TERRAIN_STEP, y),
(x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
(x, y + counter * TERRAIN_STEP),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
elif state == STAIRS and oneshot:
stair_height = +1 if self.np_random.random() > 0.5 else -1
stair_width = self.np_random.integers(4, 5)
stair_steps = self.np_random.integers(3, 5)
original_y = y
for s in range(stair_steps):
poly = [
(
x + (s * stair_width) * TERRAIN_STEP,
y + (s * stair_height) * TERRAIN_STEP,
),
(
x + ((1 + s) * stair_width) * TERRAIN_STEP,
y + (s * stair_height) * TERRAIN_STEP,
),
(
x + ((1 + s) * stair_width) * TERRAIN_STEP,
y + (-1 + s * stair_height) * TERRAIN_STEP,
),
(
x + (s * stair_width) * TERRAIN_STEP,
y + (-1 + s * stair_height) * TERRAIN_STEP,
),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
counter = stair_steps * stair_width
elif state == STAIRS and not oneshot:
s = stair_steps * stair_width - counter - stair_height
n = s / stair_width
y = original_y + (n * stair_height) * TERRAIN_STEP
oneshot = False
self.terrain_y.append(y)
counter -= 1
if counter == 0:
counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS)
if state == GRASS and hardcore:
state = self.np_random.integers(1, _STATES_)
oneshot = True
else:
state = GRASS
oneshot = True
self.terrain_poly = []
for i in range(TERRAIN_LENGTH - 1):
poly = [
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i + 1], self.terrain_y[i + 1]),
]
self.fd_edge.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_edge)
color = (76, 255 if i % 2 == 0 else 204, 76)
t.color1 = color
t.color2 = color
self.terrain.append(t)
color = (102, 153, 76)
poly += [(poly[1][0], 0), (poly[0][0], 0)]
self.terrain_poly.append((poly, color))
self.terrain.reverse()
def _generate_clouds(self):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in range(TERRAIN_LENGTH // 20):
x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
y = VIEWPORT_H / SCALE * 3 / 4
poly = [
(
x
+ 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
y
+ 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
)
for a in range(5)
]
x1 = min(p[0] for p in poly)
x2 = max(p[0] for p in poly)
self.cloud_poly.append((poly, x1, x2))
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._destroy()
self.world.contactListener_bug_workaround = ContactDetector(self)
self.world.contactListener = self.world.contactListener_bug_workaround
self.game_over = False
self.prev_shaping = None
self.scroll = 0.0
self.lidar_render = 0
self._generate_terrain(self.hardcore)
self._generate_clouds()
init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
init_y = TERRAIN_HEIGHT + 2 * LEG_H
self.hull = self.world.CreateDynamicBody(
position=(init_x, init_y), fixtures=HULL_FD
)
self.hull.color1 = (127, 51, 229)
self.hull.color2 = (76, 76, 127)
self.hull.ApplyForceToCenter(
(self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
)
self.legs: List[Box2D.b2Body] = []
self.joints: List[Box2D.b2RevoluteJoint] = []
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
angle=(i * 0.05),
fixtures=LEG_FD,
)
leg.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
leg.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=leg,
localAnchorA=(0, LEG_DOWN),
localAnchorB=(0, LEG_H / 2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed=i,
lowerAngle=-0.8,
upperAngle=1.1,
)
self.legs.append(leg)
self.joints.append(self.world.CreateJoint(rjd))
lower = self.world.CreateDynamicBody(
position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
angle=(i * 0.05),
fixtures=LOWER_FD,
)
lower.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
lower.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
rjd = revoluteJointDef(
bodyA=leg,
bodyB=lower,
localAnchorA=(0, -LEG_H / 2),
localAnchorB=(0, LEG_H / 2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed=1,
lowerAngle=-1.6,
upperAngle=-0.1,
)
lower.ground_contact = False
self.legs.append(lower)
self.joints.append(self.world.CreateJoint(rjd))
self.drawlist = self.terrain + self.legs + [self.hull]
class LidarCallback(Box2D.b2.rayCastCallback):
def ReportFixture(self, fixture, point, normal, fraction):
if (fixture.filterData.categoryBits & 1) == 0:
return -1
self.p2 = point
self.fraction = fraction
return fraction
self.lidar = [LidarCallback() for _ in range(10)]
if self.render_mode == "human":
self.render()
return self.step(np.array([0, 0, 0, 0]))[0], {}
def step(self, action: np.ndarray):
assert self.hull is not None
# self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
else:
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
)
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
)
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
)
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.hull.position
vel = self.hull.linearVelocity
for i in range(10):
self.lidar[i].fraction = 1.0
self.lidar[i].p1 = pos
self.lidar[i].p2 = (
pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
)
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
state = [
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
2.0 * self.hull.angularVelocity / FPS,
0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
self.joints[0].angle,
# This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.joints[0].speed / SPEED_HIP,
self.joints[1].angle + 1.0,
self.joints[1].speed / SPEED_KNEE,
1.0 if self.legs[1].ground_contact else 0.0,
self.joints[2].angle,
self.joints[2].speed / SPEED_HIP,
self.joints[3].angle + 1.0,
self.joints[3].speed / SPEED_KNEE,
1.0 if self.legs[3].ground_contact else 0.0,
]
state += [l.fraction for l in self.lidar]
assert len(state) == 24
self.scroll = pos.x - VIEWPORT_W / SCALE / 5
shaping = (
130 * pos[0] / SCALE
) # moving forward is a way to receive reward (normalized to get 300 on completion)
shaping -= 5.0 * abs(
state[0]
) # keep head straight, other than that and falling, any behavior is unpunished
reward = 0
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
for a in action:
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
# normalized to about -50.0 using heuristic, more optimal agent should spend less
terminated = False
if self.game_over or pos[0] < 0:
reward = -100
terminated = True
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
terminated = True
if self.render_mode == "human":
self.render()
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[box2d]`"
) from e
if self.screen is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
if self.clock is None:
self.clock = pygame.time.Clock()
self.surf = pygame.Surface(
(VIEWPORT_W + max(0.0, self.scroll) * SCALE, VIEWPORT_H)
)
pygame.transform.scale(self.surf, (SCALE, SCALE))
pygame.draw.polygon(
self.surf,
color=(215, 215, 255),
points=[
(self.scroll * SCALE, 0),
(self.scroll * SCALE + VIEWPORT_W, 0),
(self.scroll * SCALE + VIEWPORT_W, VIEWPORT_H),
(self.scroll * SCALE, VIEWPORT_H),
],
)
for poly, x1, x2 in self.cloud_poly:
if x2 < self.scroll / 2:
continue
if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
continue
pygame.draw.polygon(
self.surf,
color=(255, 255, 255),
points=[
(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly
],
)
gfxdraw.aapolygon(
self.surf,
[(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly],
(255, 255, 255),
)
for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll:
continue
if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
continue
scaled_poly = []
for coord in poly:
scaled_poly.append([coord[0] * SCALE, coord[1] * SCALE])
pygame.draw.polygon(self.surf, color=color, points=scaled_poly)
gfxdraw.aapolygon(self.surf, scaled_poly, color)
self.lidar_render = (self.lidar_render + 1) % 100
i = self.lidar_render
if i < 2 * len(self.lidar):
single_lidar = (
self.lidar[i]
if i < len(self.lidar)
else self.lidar[len(self.lidar) - i - 1]
)
if hasattr(single_lidar, "p1") and hasattr(single_lidar, "p2"):
pygame.draw.line(
self.surf,
color=(255, 0, 0),
start_pos=(single_lidar.p1[0] * SCALE, single_lidar.p1[1] * SCALE),
end_pos=(single_lidar.p2[0] * SCALE, single_lidar.p2[1] * SCALE),
width=1,
)
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
pygame.draw.circle(
self.surf,
color=obj.color1,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
pygame.draw.circle(
self.surf,
color=obj.color2,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
else:
path = [trans * v * SCALE for v in f.shape.vertices]
if len(path) > 2:
pygame.draw.polygon(self.surf, color=obj.color1, points=path)
gfxdraw.aapolygon(self.surf, path, obj.color1)
path.append(path[0])
pygame.draw.polygon(
self.surf, color=obj.color2, points=path, width=1
)
gfxdraw.aapolygon(self.surf, path, obj.color2)
else:
pygame.draw.aaline(
self.surf,
start_pos=path[0],
end_pos=path[1],
color=obj.color1,
)
flagy1 = TERRAIN_HEIGHT * SCALE
flagy2 = flagy1 + 50
x = TERRAIN_STEP * 3 * SCALE
pygame.draw.aaline(
self.surf, color=(0, 0, 0), start_pos=(x, flagy1), end_pos=(x, flagy2)
)
f = [
(x, flagy2),
(x, flagy2 - 10),
(x + 25, flagy2 - 5),
]
pygame.draw.polygon(self.surf, color=(230, 51, 0), points=f)
pygame.draw.lines(
self.surf, color=(0, 0, 0), points=f + [f[0]], width=1, closed=False
)
self.surf = pygame.transform.flip(self.surf, False, True)
if self.render_mode == "human":
assert self.screen is not None
self.screen.blit(self.surf, (-self.scroll * SCALE, 0))
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
)[:, -VIEWPORT_W:]
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
class BipedalWalkerHardcore:
def __init__(self):
raise error.Error(
"Error initializing BipedalWalkerHardcore Environment.\n"
"Currently, we do not support initializing this mode of environment by calling the class directly.\n"
"To use this environment, instead create it by specifying the hardcore keyword in gym.make, i.e.\n"
'gym.make("BipedalWalker-v3", hardcore=True)'
)
if __name__ == "__main__":
# Heurisic: suboptimal, have no notion of balance.
env = BipedalWalker()
env.reset()
steps = 0
total_reward = 0
a = np.array([0.0, 0.0, 0.0, 0.0])
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
SPEED = 0.29 # Will fall forward on higher speed
state = STAY_ON_ONE_LEG
moving_leg = 0
supporting_leg = 1 - moving_leg
SUPPORT_KNEE_ANGLE = +0.1
supporting_knee_angle = SUPPORT_KNEE_ANGLE
while True:
s, r, terminated, truncated, info = env.step(a)
total_reward += r
if steps % 20 == 0 or terminated or truncated:
print("\naction " + str([f"{x:+0.2f}" for x in a]))
print(f"step {steps} total_reward {total_reward:+0.2f}")
print("hull " + str([f"{x:+0.2f}" for x in s[0:4]]))
print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]]))
print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]]))
steps += 1
contact0 = s[8]
contact1 = s[13]
moving_s_base = 4 + 5 * moving_leg
supporting_s_base = 4 + 5 * supporting_leg
hip_targ = [None, None] # -0.8 .. +1.1
knee_targ = [None, None] # -0.6 .. +0.9
hip_todo = [0.0, 0.0]
knee_todo = [0.0, 0.0]
if state == STAY_ON_ONE_LEG:
hip_targ[moving_leg] = 1.1
knee_targ[moving_leg] = -0.6
supporting_knee_angle += 0.03
if s[2] > SPEED:
supporting_knee_angle += 0.03
supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
knee_targ[supporting_leg] = supporting_knee_angle
if s[supporting_s_base + 0] < 0.10: # supporting leg is behind
state = PUT_OTHER_DOWN
if state == PUT_OTHER_DOWN:
hip_targ[moving_leg] = +0.1
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
knee_targ[supporting_leg] = supporting_knee_angle
if s[moving_s_base + 4]:
state = PUSH_OFF
supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
if state == PUSH_OFF:
knee_targ[moving_leg] = supporting_knee_angle
knee_targ[supporting_leg] = +1.0
if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
state = STAY_ON_ONE_LEG
moving_leg = 1 - moving_leg
supporting_leg = 1 - moving_leg
if hip_targ[0]:
hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
if hip_targ[1]:
hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
if knee_targ[0]:
knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
if knee_targ[1]:
knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait
hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations
knee_todo[1] -= 15.0 * s[3]
a[0] = hip_todo[0]
a[1] = knee_todo[0]
a[2] = hip_todo[1]
a[3] = knee_todo[1]
a = np.clip(0.5 * a, -1.0, 1.0)
if terminated or truncated:
break

View File

@ -0,0 +1,356 @@
"""
Top-down car dynamics simulation.
Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
This simulation is a bit more detailed, with wheels rotation.
Created by Oleg Klimov
"""
import math
import Box2D
import numpy as np
from gymnasium.error import DependencyNotInstalled
try:
from Box2D.b2 import fixtureDef, polygonShape, revoluteJointDef
except ImportError as e:
raise DependencyNotInstalled(
"Box2D is not installed, run `pip install gymnasium[box2d]`"
) from e
SIZE = 0.02
ENGINE_POWER = 100000000 * SIZE * SIZE
WHEEL_MOMENT_OF_INERTIA = 4000 * SIZE * SIZE
FRICTION_LIMIT = (
1000000 * SIZE * SIZE
) # friction ~= mass ~= size^2 (calculated implicitly using density)
WHEEL_R = 27
WHEEL_W = 14
WHEELPOS = [(-55, +80), (+55, +80), (-55, -82), (+55, -82)]
HULL_POLY1 = [(-60, +130), (+60, +130), (+60, +110), (-60, +110)]
HULL_POLY2 = [(-15, +120), (+15, +120), (+20, +20), (-20, 20)]
HULL_POLY3 = [
(+25, +20),
(+50, -10),
(+50, -40),
(+20, -90),
(-20, -90),
(-50, -40),
(-50, -10),
(-25, +20),
]
HULL_POLY4 = [(-50, -120), (+50, -120), (+50, -90), (-50, -90)]
WHEEL_COLOR = (0, 0, 0)
WHEEL_WHITE = (77, 77, 77)
MUD_COLOR = (102, 102, 0)
class Car:
def __init__(self, world, init_angle, init_x, init_y):
self.world: Box2D.b2World = world
self.hull: Box2D.b2Body = self.world.CreateDynamicBody(
position=(init_x, init_y),
angle=init_angle,
fixtures=[
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY1]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY2]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY3]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY4]
),
density=1.0,
),
],
)
self.hull.color = (0.8, 0.0, 0.0)
self.wheels = []
self.fuel_spent = 0.0
WHEEL_POLY = [
(-WHEEL_W, +WHEEL_R),
(+WHEEL_W, +WHEEL_R),
(+WHEEL_W, -WHEEL_R),
(-WHEEL_W, -WHEEL_R),
]
for wx, wy in WHEELPOS:
front_k = 1.0 if wy > 0 else 1.0
w = self.world.CreateDynamicBody(
position=(init_x + wx * SIZE, init_y + wy * SIZE),
angle=init_angle,
fixtures=fixtureDef(
shape=polygonShape(
vertices=[
(x * front_k * SIZE, y * front_k * SIZE)
for x, y in WHEEL_POLY
]
),
density=0.1,
categoryBits=0x0020,
maskBits=0x001,
restitution=0.0,
),
)
w.wheel_rad = front_k * WHEEL_R * SIZE
w.color = WHEEL_COLOR
w.gas = 0.0
w.brake = 0.0
w.steer = 0.0
w.phase = 0.0 # wheel angle
w.omega = 0.0 # angular velocity
w.skid_start = None
w.skid_particle = None
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=w,
localAnchorA=(wx * SIZE, wy * SIZE),
localAnchorB=(0, 0),
enableMotor=True,
enableLimit=True,
maxMotorTorque=180 * 900 * SIZE * SIZE,
motorSpeed=0,
lowerAngle=-0.4,
upperAngle=+0.4,
)
w.joint = self.world.CreateJoint(rjd)
w.tiles = set()
w.userData = w
self.wheels.append(w)
self.drawlist = self.wheels + [self.hull]
self.particles = []
def gas(self, gas):
"""control: rear wheel drive
Args:
gas (float): How much gas gets applied. Gets clipped between 0 and 1.
"""
gas = np.clip(gas, 0, 1)
for w in self.wheels[2:4]:
diff = gas - w.gas
if diff > 0.1:
diff = 0.1 # gradually increase, but stop immediately
w.gas += diff
def brake(self, b):
"""control: brake
Args:
b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation
"""
for w in self.wheels:
w.brake = b
def steer(self, s):
"""control: steer
Args:
s (-1..1): target position, it takes time to rotate steering wheel from side-to-side
"""
self.wheels[0].steer = s
self.wheels[1].steer = s
def step(self, dt):
for w in self.wheels:
# Steer each wheel
dir = np.sign(w.steer - w.joint.angle)
val = abs(w.steer - w.joint.angle)
w.joint.motorSpeed = dir * min(50.0 * val, 3.0)
# Position => friction_limit
grass = True
friction_limit = FRICTION_LIMIT * 0.6 # Grass friction if no tile
for tile in w.tiles:
friction_limit = max(
friction_limit, FRICTION_LIMIT * tile.road_friction
)
grass = False
# Force
forw = w.GetWorldVector((0, 1))
side = w.GetWorldVector((1, 0))
v = w.linearVelocity
vf = forw[0] * v[0] + forw[1] * v[1] # forward speed
vs = side[0] * v[0] + side[1] * v[1] # side speed
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
# add small coef not to divide by zero
w.omega += (
dt
* ENGINE_POWER
* w.gas
/ WHEEL_MOMENT_OF_INERTIA
/ (abs(w.omega) + 5.0)
)
self.fuel_spent += dt * ENGINE_POWER * w.gas
if w.brake >= 0.9:
w.omega = 0
elif w.brake > 0:
BRAKE_FORCE = 15 # radians per second
dir = -np.sign(w.omega)
val = BRAKE_FORCE * w.brake
if abs(val) > abs(w.omega):
val = abs(w.omega) # low speed => same as = 0
w.omega += dir * val
w.phase += w.omega * dt
vr = w.omega * w.wheel_rad # rotating wheel speed
f_force = -vf + vr # force direction is direction of speed difference
p_force = -vs
# Physically correct is to always apply friction_limit until speed is equal.
# But dt is finite, that will lead to oscillations if difference is already near zero.
# Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
f_force *= 205000 * SIZE * SIZE
p_force *= 205000 * SIZE * SIZE
force = np.sqrt(np.square(f_force) + np.square(p_force))
# Skid trace
if abs(force) > 2.0 * friction_limit:
if (
w.skid_particle
and w.skid_particle.grass == grass
and len(w.skid_particle.poly) < 30
):
w.skid_particle.poly.append((w.position[0], w.position[1]))
elif w.skid_start is None:
w.skid_start = w.position
else:
w.skid_particle = self._create_particle(
w.skid_start, w.position, grass
)
w.skid_start = None
else:
w.skid_start = None
w.skid_particle = None
if abs(force) > friction_limit:
f_force /= force
p_force /= force
force = friction_limit # Correct physics here
f_force *= force
p_force *= force
w.omega -= dt * f_force * w.wheel_rad / WHEEL_MOMENT_OF_INERTIA
w.ApplyForceToCenter(
(
p_force * side[0] + f_force * forw[0],
p_force * side[1] + f_force * forw[1],
),
True,
)
def draw(self, surface, zoom, translation, angle, draw_particles=True):
import pygame.draw
if draw_particles:
for p in self.particles:
poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in p.poly]
poly = [
(
coords[0] * zoom + translation[0],
coords[1] * zoom + translation[1],
)
for coords in poly
]
pygame.draw.lines(
surface, color=p.color, points=poly, width=2, closed=False
)
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
path = [trans * v for v in f.shape.vertices]
path = [(coords[0], coords[1]) for coords in path]
path = [pygame.math.Vector2(c).rotate_rad(angle) for c in path]
path = [
(
coords[0] * zoom + translation[0],
coords[1] * zoom + translation[1],
)
for coords in path
]
color = [int(c * 255) for c in obj.color]
pygame.draw.polygon(surface, color=color, points=path)
if "phase" not in obj.__dict__:
continue
a1 = obj.phase
a2 = obj.phase + 1.2 # radians
s1 = math.sin(a1)
s2 = math.sin(a2)
c1 = math.cos(a1)
c2 = math.cos(a2)
if s1 > 0 and s2 > 0:
continue
if s1 > 0:
c1 = np.sign(c1)
if s2 > 0:
c2 = np.sign(c2)
white_poly = [
(-WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
(+WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
(+WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
(-WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
]
white_poly = [trans * v for v in white_poly]
white_poly = [(coords[0], coords[1]) for coords in white_poly]
white_poly = [
pygame.math.Vector2(c).rotate_rad(angle) for c in white_poly
]
white_poly = [
(
coords[0] * zoom + translation[0],
coords[1] * zoom + translation[1],
)
for coords in white_poly
]
pygame.draw.polygon(surface, color=WHEEL_WHITE, points=white_poly)
def _create_particle(self, point1, point2, grass):
class Particle:
pass
p = Particle()
p.color = WHEEL_COLOR if not grass else MUD_COLOR
p.ttl = 1
p.poly = [(point1[0], point1[1]), (point2[0], point2[1])]
p.grass = grass
self.particles.append(p)
while len(self.particles) > 30:
self.particles.pop(0)
return p
def destroy(self):
self.world.DestroyBody(self.hull)
self.hull = None
for w in self.wheels:
self.world.DestroyBody(w)
self.wheels = []

View File

@ -0,0 +1,841 @@
__credits__ = ["Andrea PIERRÉ"]
import math
from typing import Optional, Union
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.box2d.car_dynamics import Car
from gymnasium.error import DependencyNotInstalled, InvalidAction
from gymnasium.utils import EzPickle
try:
import Box2D
from Box2D.b2 import contactListener, fixtureDef, polygonShape
except ImportError as e:
raise DependencyNotInstalled(
"Box2D is not installed, run `pip install gymnasium[box2d]`"
) from e
try:
# As pygame is necessary for using the environment (reset and step) even without a render mode
# therefore, pygame is a necessary import for the environment.
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[box2d]`"
) from e
STATE_W = 96 # less than Atari 160x192
STATE_H = 96
VIDEO_W = 600
VIDEO_H = 400
WINDOW_W = 1000
WINDOW_H = 800
SCALE = 6.0 # Track scale
TRACK_RAD = 900 / SCALE # Track is heavily morphed circle with this radius
PLAYFIELD = 2000 / SCALE # Game over boundary
FPS = 50 # Frames per second
ZOOM = 2.7 # Camera zoom
ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom)
TRACK_DETAIL_STEP = 21 / SCALE
TRACK_TURN_RATE = 0.31
TRACK_WIDTH = 40 / SCALE
BORDER = 8 / SCALE
BORDER_MIN_COUNT = 4
GRASS_DIM = PLAYFIELD / 20.0
MAX_SHAPE_DIM = (
max(GRASS_DIM, TRACK_WIDTH, TRACK_DETAIL_STEP) * math.sqrt(2) * ZOOM * SCALE
)
class FrictionDetector(contactListener):
def __init__(self, env, lap_complete_percent):
contactListener.__init__(self)
self.env = env
self.lap_complete_percent = lap_complete_percent
def BeginContact(self, contact):
self._contact(contact, True)
def EndContact(self, contact):
self._contact(contact, False)
def _contact(self, contact, begin):
tile = None
obj = None
u1 = contact.fixtureA.body.userData
u2 = contact.fixtureB.body.userData
if u1 and "road_friction" in u1.__dict__:
tile = u1
obj = u2
if u2 and "road_friction" in u2.__dict__:
tile = u2
obj = u1
if not tile:
return
# inherit tile color from env
tile.color[:] = self.env.road_color
if not obj or "tiles" not in obj.__dict__:
return
if begin:
obj.tiles.add(tile)
if not tile.road_visited:
tile.road_visited = True
self.env.reward += 1000.0 / len(self.env.track)
self.env.tile_visited_count += 1
# Lap is considered completed if enough % of the track was covered
if (
tile.idx == 0
and self.env.tile_visited_count / len(self.env.track)
> self.lap_complete_percent
):
self.env.new_lap = True
else:
obj.tiles.remove(tile)
class CarRacing(gym.Env, EzPickle):
"""
## Description
The easiest control task to learn from pixels - a top-down
racing environment. The generated track is random every episode.
Some indicators are shown at the bottom of the window along with the
state RGB buffer. From left to right: true speed, four ABS sensors,
steering wheel position, and gyroscope.
To play yourself (it's rather fast for humans), type:
```
python gymnasium/envs/box2d/car_racing.py
```
Remember: it's a powerful rear-wheel drive car - don't press the accelerator
and turn at the same time.
## Action Space
If continuous there are 3 actions :
- 0: steering, -1 is full left, +1 is full right
- 1: gas
- 2: breaking
If discrete there are 5 actions:
- 0: do nothing
- 1: steer left
- 2: steer right
- 3: gas
- 4: brake
## Observation Space
A top-down 96x96 RGB image of the car and race track.
## Rewards
The reward is -0.1 every frame and +1000/N for every track tile visited,
where N is the total number of tiles visited in the track. For example,
if you have finished in 732 frames, your reward is
1000 - 0.1*732 = 926.8 points.
## Starting State
The car starts at rest in the center of the road.
## Episode Termination
The episode finishes when all the tiles are visited. The car can also go
outside the playfield - that is, far off the track, in which case it will
receive -100 reward and die.
## Arguments
`lap_complete_percent` dictates the percentage of tiles that must be visited by
the agent before a lap is considered complete.
Passing `domain_randomize=True` enables the domain randomized variant of the environment.
In this scenario, the background and track colours are different on every reset.
Passing `continuous=False` converts the environment to use discrete action space.
The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
## Reset Arguments
Passing the option `options["randomize"] = True` will change the current colour of the environment on demand.
Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment.
`domain_randomize` must be `True` on init for this argument to work.
Example usage:
```python
import gymnasium as gym
env = gym.make("CarRacing-v1", domain_randomize=True)
# normal reset, this changes the colour scheme by default
env.reset()
# reset with colour scheme change
env.reset(options={"randomize": True})
# reset with no colour scheme change
env.reset(options={"randomize": False})
```
## Version History
- v1: Change track completion logic and add domain randomization (0.24.0)
- v0: Original version
## References
- Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.
## Credits
Created by Oleg Klimov
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"state_pixels",
],
"render_fps": FPS,
}
def __init__(
self,
render_mode: Optional[str] = None,
verbose: bool = False,
lap_complete_percent: float = 0.95,
domain_randomize: bool = False,
continuous: bool = True,
):
EzPickle.__init__(
self,
render_mode,
verbose,
lap_complete_percent,
domain_randomize,
continuous,
)
self.continuous = continuous
self.domain_randomize = domain_randomize
self.lap_complete_percent = lap_complete_percent
self._init_colors()
self.contactListener_keepref = FrictionDetector(self, self.lap_complete_percent)
self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
self.screen: Optional[pygame.Surface] = None
self.surf = None
self.clock = None
self.isopen = True
self.invisible_state_window = None
self.invisible_video_window = None
self.road = None
self.car: Optional[Car] = None
self.reward = 0.0
self.prev_reward = 0.0
self.verbose = verbose
self.new_lap = False
self.fd_tile = fixtureDef(
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
)
# This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
# or normalised however this is not possible here so ignore
if self.continuous:
self.action_space = spaces.Box(
np.array([-1, 0, 0]).astype(np.float32),
np.array([+1, +1, +1]).astype(np.float32),
) # steer, gas, brake
else:
self.action_space = spaces.Discrete(5)
# do nothing, left, right, gas, brake
self.observation_space = spaces.Box(
low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
)
self.render_mode = render_mode
def _destroy(self):
if not self.road:
return
for t in self.road:
self.world.DestroyBody(t)
self.road = []
assert self.car is not None
self.car.destroy()
def _init_colors(self):
if self.domain_randomize:
# domain randomize the bg and grass colour
self.road_color = self.np_random.uniform(0, 210, size=3)
self.bg_color = self.np_random.uniform(0, 210, size=3)
self.grass_color = np.copy(self.bg_color)
idx = self.np_random.integers(3)
self.grass_color[idx] += 20
else:
# default colours
self.road_color = np.array([102, 102, 102])
self.bg_color = np.array([102, 204, 102])
self.grass_color = np.array([102, 230, 102])
def _reinit_colors(self, randomize):
assert (
self.domain_randomize
), "domain_randomize must be True to use this function."
if randomize:
# domain randomize the bg and grass colour
self.road_color = self.np_random.uniform(0, 210, size=3)
self.bg_color = self.np_random.uniform(0, 210, size=3)
self.grass_color = np.copy(self.bg_color)
idx = self.np_random.integers(3)
self.grass_color[idx] += 20
def _create_track(self):
CHECKPOINTS = 12
# Create checkpoints
checkpoints = []
for c in range(CHECKPOINTS):
noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
alpha = 2 * math.pi * c / CHECKPOINTS + noise
rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
if c == 0:
alpha = 0
rad = 1.5 * TRACK_RAD
if c == CHECKPOINTS - 1:
alpha = 2 * math.pi * c / CHECKPOINTS
self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
rad = 1.5 * TRACK_RAD
checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
self.road = []
# Go from one checkpoint to another to create track
x, y, beta = 1.5 * TRACK_RAD, 0, 0
dest_i = 0
laps = 0
track = []
no_freeze = 2500
visited_other_side = False
while True:
alpha = math.atan2(y, x)
if visited_other_side and alpha > 0:
laps += 1
visited_other_side = False
if alpha < 0:
visited_other_side = True
alpha += 2 * math.pi
while True: # Find destination from checkpoints
failed = True
while True:
dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
if alpha <= dest_alpha:
failed = False
break
dest_i += 1
if dest_i % len(checkpoints) == 0:
break
if not failed:
break
alpha -= 2 * math.pi
continue
r1x = math.cos(beta)
r1y = math.sin(beta)
p1x = -r1y
p1y = r1x
dest_dx = dest_x - x # vector towards destination
dest_dy = dest_y - y
# destination vector projected on rad:
proj = r1x * dest_dx + r1y * dest_dy
while beta - alpha > 1.5 * math.pi:
beta -= 2 * math.pi
while beta - alpha < -1.5 * math.pi:
beta += 2 * math.pi
prev_beta = beta
proj *= SCALE
if proj > 0.3:
beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
if proj < -0.3:
beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
x += p1x * TRACK_DETAIL_STEP
y += p1y * TRACK_DETAIL_STEP
track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
if laps > 4:
break
no_freeze -= 1
if no_freeze == 0:
break
# Find closed loop range i1..i2, first loop should be ignored, second is OK
i1, i2 = -1, -1
i = len(track)
while True:
i -= 1
if i == 0:
return False # Failed
pass_through_start = (
track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
)
if pass_through_start and i2 == -1:
i2 = i
elif pass_through_start and i1 == -1:
i1 = i
break
if self.verbose:
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
assert i1 != -1
assert i2 != -1
track = track[i1 : i2 - 1]
first_beta = track[0][1]
first_perp_x = math.cos(first_beta)
first_perp_y = math.sin(first_beta)
# Length of perpendicular jump to put together head and tail
well_glued_together = np.sqrt(
np.square(first_perp_x * (track[0][2] - track[-1][2]))
+ np.square(first_perp_y * (track[0][3] - track[-1][3]))
)
if well_glued_together > TRACK_DETAIL_STEP:
return False
# Red-white border on hard turns
border = [False] * len(track)
for i in range(len(track)):
good = True
oneside = 0
for neg in range(BORDER_MIN_COUNT):
beta1 = track[i - neg - 0][1]
beta2 = track[i - neg - 1][1]
good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
oneside += np.sign(beta1 - beta2)
good &= abs(oneside) == BORDER_MIN_COUNT
border[i] = good
for i in range(len(track)):
for neg in range(BORDER_MIN_COUNT):
border[i - neg] |= border[i]
# Create tiles
for i in range(len(track)):
alpha1, beta1, x1, y1 = track[i]
alpha2, beta2, x2, y2 = track[i - 1]
road1_l = (
x1 - TRACK_WIDTH * math.cos(beta1),
y1 - TRACK_WIDTH * math.sin(beta1),
)
road1_r = (
x1 + TRACK_WIDTH * math.cos(beta1),
y1 + TRACK_WIDTH * math.sin(beta1),
)
road2_l = (
x2 - TRACK_WIDTH * math.cos(beta2),
y2 - TRACK_WIDTH * math.sin(beta2),
)
road2_r = (
x2 + TRACK_WIDTH * math.cos(beta2),
y2 + TRACK_WIDTH * math.sin(beta2),
)
vertices = [road1_l, road1_r, road2_r, road2_l]
self.fd_tile.shape.vertices = vertices
t = self.world.CreateStaticBody(fixtures=self.fd_tile)
t.userData = t
c = 0.01 * (i % 3) * 255
t.color = self.road_color + c
t.road_visited = False
t.road_friction = 1.0
t.idx = i
t.fixtures[0].sensor = True
self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
self.road.append(t)
if border[i]:
side = np.sign(beta2 - beta1)
b1_l = (
x1 + side * TRACK_WIDTH * math.cos(beta1),
y1 + side * TRACK_WIDTH * math.sin(beta1),
)
b1_r = (
x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
)
b2_l = (
x2 + side * TRACK_WIDTH * math.cos(beta2),
y2 + side * TRACK_WIDTH * math.sin(beta2),
)
b2_r = (
x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
)
self.road_poly.append(
(
[b1_l, b1_r, b2_r, b2_l],
(255, 255, 255) if i % 2 == 0 else (255, 0, 0),
)
)
self.track = track
return True
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._destroy()
self.world.contactListener_bug_workaround = FrictionDetector(
self, self.lap_complete_percent
)
self.world.contactListener = self.world.contactListener_bug_workaround
self.reward = 0.0
self.prev_reward = 0.0
self.tile_visited_count = 0
self.t = 0.0
self.new_lap = False
self.road_poly = []
if self.domain_randomize:
randomize = True
if isinstance(options, dict):
if "randomize" in options:
randomize = options["randomize"]
self._reinit_colors(randomize)
while True:
success = self._create_track()
if success:
break
if self.verbose:
print(
"retry to generate track (normal if there are not many"
"instances of this message)"
)
self.car = Car(self.world, *self.track[0][1:4])
if self.render_mode == "human":
self.render()
return self.step(None)[0], {}
def step(self, action: Union[np.ndarray, int]):
assert self.car is not None
if action is not None:
if self.continuous:
self.car.steer(-action[0])
self.car.gas(action[1])
self.car.brake(action[2])
else:
if not self.action_space.contains(action):
raise InvalidAction(
f"you passed the invalid action `{action}`. "
f"The supported action_space is `{self.action_space}`"
)
self.car.steer(-0.6 * (action == 1) + 0.6 * (action == 2))
self.car.gas(0.2 * (action == 3))
self.car.brake(0.8 * (action == 4))
self.car.step(1.0 / FPS)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
self.t += 1.0 / FPS
self.state = self._render("state_pixels")
step_reward = 0
terminated = False
truncated = False
if action is not None: # First step without action, called from reset()
self.reward -= 0.1
# We actually don't want to count fuel spent, we want car to be faster.
# self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER
self.car.fuel_spent = 0.0
step_reward = self.reward - self.prev_reward
self.prev_reward = self.reward
if self.tile_visited_count == len(self.track) or self.new_lap:
# Truncation due to finishing lap
# This should not be treated as a failure
# but like a timeout
truncated = True
x, y = self.car.hull.position
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
terminated = True
step_reward = -100
if self.render_mode == "human":
self.render()
return self.state, step_reward, terminated, truncated, {}
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
else:
return self._render(self.render_mode)
def _render(self, mode: str):
assert mode in self.metadata["render_modes"]
pygame.font.init()
if self.screen is None and mode == "human":
pygame.init()
pygame.display.init()
self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H))
if self.clock is None:
self.clock = pygame.time.Clock()
if "t" not in self.__dict__:
return # reset() not called yet
self.surf = pygame.Surface((WINDOW_W, WINDOW_H))
assert self.car is not None
# computing transformations
angle = -self.car.hull.angle
# Animating first second zoom.
zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
scroll_x = -(self.car.hull.position[0]) * zoom
scroll_y = -(self.car.hull.position[1]) * zoom
trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])
self._render_road(zoom, trans, angle)
self.car.draw(
self.surf,
zoom,
trans,
angle,
mode not in ["state_pixels_list", "state_pixels"],
)
self.surf = pygame.transform.flip(self.surf, False, True)
# showing stats
self._render_indicators(WINDOW_W, WINDOW_H)
font = pygame.font.Font(pygame.font.get_default_font(), 42)
text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0))
text_rect = text.get_rect()
text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0)
self.surf.blit(text, text_rect)
if mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
assert self.screen is not None
self.screen.fill(0)
self.screen.blit(self.surf, (0, 0))
pygame.display.flip()
elif mode == "rgb_array":
return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H))
elif mode == "state_pixels":
return self._create_image_array(self.surf, (STATE_W, STATE_H))
else:
return self.isopen
def _render_road(self, zoom, translation, angle):
bounds = PLAYFIELD
field = [
(bounds, bounds),
(bounds, -bounds),
(-bounds, -bounds),
(-bounds, bounds),
]
# draw background
self._draw_colored_polygon(
self.surf, field, self.bg_color, zoom, translation, angle, clip=False
)
# draw grass patches
grass = []
for x in range(-20, 20, 2):
for y in range(-20, 20, 2):
grass.append(
[
(GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + 0),
(GRASS_DIM * x + 0, GRASS_DIM * y + 0),
(GRASS_DIM * x + 0, GRASS_DIM * y + GRASS_DIM),
(GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + GRASS_DIM),
]
)
for poly in grass:
self._draw_colored_polygon(
self.surf, poly, self.grass_color, zoom, translation, angle
)
# draw road
for poly, color in self.road_poly:
# converting to pixel coordinates
poly = [(p[0], p[1]) for p in poly]
color = [int(c) for c in color]
self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle)
def _render_indicators(self, W, H):
s = W / 40.0
h = H / 40.0
color = (0, 0, 0)
polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)]
pygame.draw.polygon(self.surf, color=color, points=polygon)
def vertical_ind(place, val):
return [
(place * s, H - (h + h * val)),
((place + 1) * s, H - (h + h * val)),
((place + 1) * s, H - h),
((place + 0) * s, H - h),
]
def horiz_ind(place, val):
return [
((place + 0) * s, H - 4 * h),
((place + val) * s, H - 4 * h),
((place + val) * s, H - 2 * h),
((place + 0) * s, H - 2 * h),
]
assert self.car is not None
true_speed = np.sqrt(
np.square(self.car.hull.linearVelocity[0])
+ np.square(self.car.hull.linearVelocity[1])
)
# simple wrapper to render if the indicator value is above a threshold
def render_if_min(value, points, color):
if abs(value) > 1e-4:
pygame.draw.polygon(self.surf, points=points, color=color)
render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255))
# ABS sensors
render_if_min(
self.car.wheels[0].omega,
vertical_ind(7, 0.01 * self.car.wheels[0].omega),
(0, 0, 255),
)
render_if_min(
self.car.wheels[1].omega,
vertical_ind(8, 0.01 * self.car.wheels[1].omega),
(0, 0, 255),
)
render_if_min(
self.car.wheels[2].omega,
vertical_ind(9, 0.01 * self.car.wheels[2].omega),
(51, 0, 255),
)
render_if_min(
self.car.wheels[3].omega,
vertical_ind(10, 0.01 * self.car.wheels[3].omega),
(51, 0, 255),
)
render_if_min(
self.car.wheels[0].joint.angle,
horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle),
(0, 255, 0),
)
render_if_min(
self.car.hull.angularVelocity,
horiz_ind(30, -0.8 * self.car.hull.angularVelocity),
(255, 0, 0),
)
def _draw_colored_polygon(
self, surface, poly, color, zoom, translation, angle, clip=True
):
poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly]
poly = [
(c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly
]
# This checks if the polygon is out of bounds of the screen, and we skip drawing if so.
# Instead of calculating exactly if the polygon and screen overlap,
# we simply check if the polygon is in a larger bounding box whose dimension
# is greater than the screen by MAX_SHAPE_DIM, which is the maximum
# diagonal length of an environment object
if not clip or any(
(-MAX_SHAPE_DIM <= coord[0] <= WINDOW_W + MAX_SHAPE_DIM)
and (-MAX_SHAPE_DIM <= coord[1] <= WINDOW_H + MAX_SHAPE_DIM)
for coord in poly
):
gfxdraw.aapolygon(self.surf, poly, color)
gfxdraw.filled_polygon(self.surf, poly, color)
def _create_image_array(self, screen, size):
scaled_screen = pygame.transform.smoothscale(screen, size)
return np.transpose(
np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
pygame.display.quit()
self.isopen = False
pygame.quit()
if __name__ == "__main__":
a = np.array([0.0, 0.0, 0.0])
def register_input():
global quit, restart
for event in pygame.event.get():
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_LEFT:
a[0] = -1.0
if event.key == pygame.K_RIGHT:
a[0] = +1.0
if event.key == pygame.K_UP:
a[1] = +1.0
if event.key == pygame.K_DOWN:
a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
if event.key == pygame.K_RETURN:
restart = True
if event.key == pygame.K_ESCAPE:
quit = True
if event.type == pygame.KEYUP:
if event.key == pygame.K_LEFT:
a[0] = 0
if event.key == pygame.K_RIGHT:
a[0] = 0
if event.key == pygame.K_UP:
a[1] = 0
if event.key == pygame.K_DOWN:
a[2] = 0
if event.type == pygame.QUIT:
quit = True
env = CarRacing(render_mode="human")
quit = False
while not quit:
env.reset()
total_reward = 0.0
steps = 0
restart = False
while True:
register_input()
s, r, terminated, truncated, info = env.step(a)
total_reward += r
if steps % 200 == 0 or terminated or truncated:
print("\naction " + str([f"{x:+0.2f}" for x in a]))
print(f"step {steps} total_reward {total_reward:+0.2f}")
steps += 1
if terminated or truncated or restart or quit:
break
env.close()

View File

@ -0,0 +1,894 @@
__credits__ = ["Andrea PIERRÉ"]
import math
import warnings
from typing import TYPE_CHECKING, Optional
import numpy as np
import gymnasium as gym
from gymnasium import error, spaces
from gymnasium.error import DependencyNotInstalled
from gymnasium.utils import EzPickle, colorize
from gymnasium.utils.step_api_compatibility import step_api_compatibility
try:
import Box2D
from Box2D.b2 import (
circleShape,
contactListener,
edgeShape,
fixtureDef,
polygonShape,
revoluteJointDef,
)
except ImportError as e:
raise DependencyNotInstalled(
"Box2D is not installed, run `pip install gymnasium[box2d]`"
) from e
if TYPE_CHECKING:
import pygame
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MAIN_ENGINE_POWER = 13.0
SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)]
LEG_AWAY = 20
LEG_DOWN = 18
LEG_W, LEG_H = 2, 8
LEG_SPRING_TORQUE = 40
SIDE_ENGINE_HEIGHT = 14
SIDE_ENGINE_AWAY = 12
MAIN_ENGINE_Y_LOCATION = (
4 # The Y location of the main engine on the body of the Lander.
)
VIEWPORT_W = 600
VIEWPORT_H = 400
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if (
self.env.lander == contact.fixtureA.body
or self.env.lander == contact.fixtureB.body
):
self.env.game_over = True
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = True
def EndContact(self, contact):
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = False
class LunarLander(gym.Env, EzPickle):
"""
## Description
This environment is a classic rocket trajectory optimization problem.
According to Pontryagin's maximum principle, it is optimal to fire the
engine at full throttle or turn it off. This is the reason why this
environment has discrete actions: engine on or off.
There are two environment versions: discrete or continuous.
The landing pad is always at coordinates (0,0). The coordinates are the
first two numbers in the state vector.
Landing outside of the landing pad is possible. Fuel is infinite, so an agent
can learn to fly and then land on its first attempt.
To see a heuristic landing, run:
```
python gymnasium/envs/box2d/lunar_lander.py
```
<!-- To play yourself, run: -->
<!-- python examples/agents/keyboard_agent.py LunarLander-v2 -->
## Action Space
There are four discrete actions available:
- 0: do nothing
- 1: fire left orientation engine
- 2: fire main engine
- 3: fire right orientation engine
## Observation Space
The state is an 8-dimensional vector: the coordinates of the lander in `x` & `y`, its linear
velocities in `x` & `y`, its angle, its angular velocity, and two booleans
that represent whether each leg is in contact with the ground or not.
## Rewards
After every step a reward is granted. The total reward of an episode is the
sum of the rewards for all the steps within that episode.
For each step, the reward:
- is increased/decreased the closer/further the lander is to the landing pad.
- is increased/decreased the slower/faster the lander is moving.
- is decreased the more the lander is tilted (angle not horizontal).
- is increased by 10 points for each leg that is in contact with the ground.
- is decreased by 0.03 points each frame a side engine is firing.
- is decreased by 0.3 points each frame the main engine is firing.
The episode receive an additional reward of -100 or +100 points for crashing or landing safely respectively.
An episode is considered a solution if it scores at least 200 points.
## Starting State
The lander starts at the top center of the viewport with a random initial
force applied to its center of mass.
## Episode Termination
The episode finishes if:
1) the lander crashes (the lander body gets in contact with the moon);
2) the lander gets outside of the viewport (`x` coordinate is greater than 1);
3) the lander is not awake. From the [Box2D docs](https://box2d.org/documentation/md__d_1__git_hub_box2d_docs_dynamics.html#autotoc_md61),
a body which is not awake is a body which doesn't move and doesn't
collide with any other body:
> When Box2D determines that a body (or group of bodies) has come to rest,
> the body enters a sleep state which has very little CPU overhead. If a
> body is awake and collides with a sleeping body, then the sleeping body
> wakes up. Bodies will also wake up if a joint or contact attached to
> them is destroyed.
## Arguments
To use to the _continuous_ environment, you need to specify the
`continuous=True` argument like below:
```python
import gymnasium as gym
env = gym.make(
"LunarLander-v2",
continuous: bool = False,
gravity: float = -10.0,
enable_wind: bool = False,
wind_power: float = 15.0,
turbulence_power: float = 1.5,
)
```
If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the
action space will be `Box(-1, +1, (2,), dtype=np.float32)`.
The first coordinate of an action determines the throttle of the main engine, while the second
coordinate specifies the throttle of the lateral boosters.
Given an action `np.array([main, lateral])`, the main engine will be turned off completely if
`main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the
main engine doesn't work with less than 50% power).
Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
`gravity` dictates the gravitational constant, this is bounded to be within 0 and -12.
If `enable_wind=True` is passed, there will be wind effects applied to the lander.
The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`.
`k` is set to 0.01.
`C` is sampled randomly between -9999 and 9999.
`wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0.
`turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0.
## Version History
- v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters
- v1: Legs contact with ground added in state vector; contact with ground
give +10 reward points, and -10 if then lose contact; reward
renormalized to 200; harder initial random push.
- v0: Initial version
## Notes
There are several unexpected bugs with the implementation of the environment.
1. The position of the side thursters on the body of the lander changes, depending on the orientation of the lander.
This in turn results in an orientation depentant torque being applied to the lander.
2. The units of the state are not consistent. I.e.
* The angular velocity is in units of 0.4 radians per second. In order to convert to radians per second, the value needs to be multiplied by a factor of 2.5.
For the default values of VIEWPORT_W, VIEWPORT_H, SCALE, and FPS, the scale factors equal:
'x': 10
'y': 6.666
'vx': 5
'vy': 7.5
'angle': 1
'angular velocity': 2.5
After the correction has been made, the units of the state are as follows:
'x': (units)
'y': (units)
'vx': (units/second)
'vy': (units/second)
'angle': (radians)
'angular velocity': (radians/second)
<!-- ## References -->
## Credits
Created by Oleg Klimov
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": FPS,
}
def __init__(
self,
render_mode: Optional[str] = None,
continuous: bool = False,
gravity: float = -10.0,
enable_wind: bool = False,
wind_power: float = 15.0,
turbulence_power: float = 1.5,
):
EzPickle.__init__(
self,
render_mode,
continuous,
gravity,
enable_wind,
wind_power,
turbulence_power,
)
assert (
-12.0 < gravity and gravity < 0.0
), f"gravity (current value: {gravity}) must be between -12 and 0"
self.gravity = gravity
if 0.0 > wind_power or wind_power > 20.0:
warnings.warn(
colorize(
f"WARN: wind_power value is recommended to be between 0.0 and 20.0, (current value: {wind_power})",
"yellow",
),
)
self.wind_power = wind_power
if 0.0 > turbulence_power or turbulence_power > 2.0:
warnings.warn(
colorize(
f"WARN: turbulence_power value is recommended to be between 0.0 and 2.0, (current value: {turbulence_power})",
"yellow",
),
)
self.turbulence_power = turbulence_power
self.enable_wind = enable_wind
self.wind_idx = np.random.randint(-9999, 9999)
self.torque_idx = np.random.randint(-9999, 9999)
self.screen: pygame.Surface = None
self.clock = None
self.isopen = True
self.world = Box2D.b2World(gravity=(0, gravity))
self.moon = None
self.lander: Optional[Box2D.b2Body] = None
self.particles = []
self.prev_reward = None
self.continuous = continuous
low = np.array(
[
# these are bounds for position
# realistically the environment should have ended
# long before we reach more than 50% outside
-1.5,
-1.5,
# velocity bounds is 5x rated speed
-5.0,
-5.0,
-math.pi,
-5.0,
-0.0,
-0.0,
]
).astype(np.float32)
high = np.array(
[
# these are bounds for position
# realistically the environment should have ended
# long before we reach more than 50% outside
1.5,
1.5,
# velocity bounds is 5x rated speed
5.0,
5.0,
math.pi,
5.0,
1.0,
1.0,
]
).astype(np.float32)
# useful range is -1 .. +1, but spikes can be higher
self.observation_space = spaces.Box(low, high)
if self.continuous:
# Action is two floats [main engine, left-right engines].
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
else:
# Nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4)
self.render_mode = render_mode
def _destroy(self):
if not self.moon:
return
self.world.contactListener = None
self._clean_particles(True)
self.world.DestroyBody(self.moon)
self.moon = None
self.world.DestroyBody(self.lander)
self.lander = None
self.world.DestroyBody(self.legs[0])
self.world.DestroyBody(self.legs[1])
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._destroy()
self.world.contactListener_keepref = ContactDetector(self)
self.world.contactListener = self.world.contactListener_keepref
self.game_over = False
self.prev_shaping = None
W = VIEWPORT_W / SCALE
H = VIEWPORT_H / SCALE
# Create Terrain
CHUNKS = 11
height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,))
chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS // 2 - 1]
self.helipad_x2 = chunk_x[CHUNKS // 2 + 1]
self.helipad_y = H / 4
height[CHUNKS // 2 - 2] = self.helipad_y
height[CHUNKS // 2 - 1] = self.helipad_y
height[CHUNKS // 2 + 0] = self.helipad_y
height[CHUNKS // 2 + 1] = self.helipad_y
height[CHUNKS // 2 + 2] = self.helipad_y
smooth_y = [
0.33 * (height[i - 1] + height[i + 0] + height[i + 1])
for i in range(CHUNKS)
]
self.moon = self.world.CreateStaticBody(
shapes=edgeShape(vertices=[(0, 0), (W, 0)])
)
self.sky_polys = []
for i in range(CHUNKS - 1):
p1 = (chunk_x[i], smooth_y[i])
p2 = (chunk_x[i + 1], smooth_y[i + 1])
self.moon.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1)
self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
self.moon.color1 = (0.0, 0.0, 0.0)
self.moon.color2 = (0.0, 0.0, 0.0)
# Create Lander body
initial_y = VIEWPORT_H / SCALE
initial_x = VIEWPORT_W / SCALE / 2
self.lander: Box2D.b2Body = self.world.CreateDynamicBody(
position=(initial_x, initial_y),
angle=0.0,
fixtures=fixtureDef(
shape=polygonShape(
vertices=[(x / SCALE, y / SCALE) for x, y in LANDER_POLY]
),
density=5.0,
friction=0.1,
categoryBits=0x0010,
maskBits=0x001, # collide only with ground
restitution=0.0,
), # 0.99 bouncy
)
self.lander.color1 = (128, 102, 230)
self.lander.color2 = (77, 77, 128)
# Apply the initial random impulse to the lander
self.lander.ApplyForceToCenter(
(
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
),
True,
)
# Create Lander Legs
self.legs = []
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position=(initial_x - i * LEG_AWAY / SCALE, initial_y),
angle=(i * 0.05),
fixtures=fixtureDef(
shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
),
)
leg.ground_contact = False
leg.color1 = (128, 102, 230)
leg.color2 = (77, 77, 128)
rjd = revoluteJointDef(
bodyA=self.lander,
bodyB=leg,
localAnchorA=(0, 0),
localAnchorB=(i * LEG_AWAY / SCALE, LEG_DOWN / SCALE),
enableMotor=True,
enableLimit=True,
maxMotorTorque=LEG_SPRING_TORQUE,
motorSpeed=+0.3 * i, # low enough not to jump back into the sky
)
if i == -1:
rjd.lowerAngle = (
+0.9 - 0.5
) # The most esoteric numbers here, angled legs have freedom to travel within
rjd.upperAngle = +0.9
else:
rjd.lowerAngle = -0.9
rjd.upperAngle = -0.9 + 0.5
leg.joint = self.world.CreateJoint(rjd)
self.legs.append(leg)
self.drawlist = [self.lander] + self.legs
if self.render_mode == "human":
self.render()
return self.step(np.array([0, 0]) if self.continuous else 0)[0], {}
def _create_particle(self, mass, x, y, ttl):
p = self.world.CreateDynamicBody(
position=(x, y),
angle=0.0,
fixtures=fixtureDef(
shape=circleShape(radius=2 / SCALE, pos=(0, 0)),
density=mass,
friction=0.1,
categoryBits=0x0100,
maskBits=0x001, # collide only with ground
restitution=0.3,
),
)
p.ttl = ttl
self.particles.append(p)
self._clean_particles(False)
return p
def _clean_particles(self, all):
while self.particles and (all or self.particles[0].ttl < 0):
self.world.DestroyBody(self.particles.pop(0))
def step(self, action):
assert self.lander is not None
# Update wind and apply to the lander
assert self.lander is not None, "You forgot to call reset()"
if self.enable_wind and not (
self.legs[0].ground_contact or self.legs[1].ground_contact
):
# the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
wind_mag = (
math.tanh(
math.sin(0.02 * self.wind_idx)
+ (math.sin(math.pi * 0.01 * self.wind_idx))
)
* self.wind_power
)
self.wind_idx += 1
self.lander.ApplyForceToCenter(
(wind_mag, 0.0),
True,
)
# the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
torque_mag = math.tanh(
math.sin(0.02 * self.torque_idx)
+ (math.sin(math.pi * 0.01 * self.torque_idx))
) * (self.turbulence_power)
self.torque_idx += 1
self.lander.ApplyTorque(
(torque_mag),
True,
)
if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
else:
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid "
# Apply Engine Impulses
# Tip is a the (X and Y) components of the rotation of the lander.
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
# Side is the (-Y and X) components of the rotation of the lander.
side = (-tip[1], tip[0])
# Generate two random numbers between -1/SCALE and 1/SCALE.
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (
not self.continuous and action == 2
):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 # 0.5..1.0
assert m_power >= 0.5 and m_power <= 1.0
else:
m_power = 1.0
# 4 is move a bit downwards, +-2 for randomness
# The components of the impulse to be applied by the main engine.
ox = (
tip[0] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0])
+ side[0] * dispersion[1]
)
oy = (
-tip[1] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0])
- side[1] * dispersion[1]
)
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
if self.render_mode is not None:
# particles are just a decoration, with no impact on the physics, so don't add them when not rendering
p = self._create_particle(
3.5, # 3.5 is here to make particle speed adequate
impulse_pos[0],
impulse_pos[1],
m_power,
)
p.ApplyLinearImpulse(
(
ox * MAIN_ENGINE_POWER * m_power,
oy * MAIN_ENGINE_POWER * m_power,
),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (
not self.continuous and action in [1, 3]
):
# Orientation/Side engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
assert s_power >= 0.5 and s_power <= 1.0
else:
# action = 1 is left, action = 3 is right
direction = action - 2
s_power = 1.0
# The components of the impulse to be applied by the side engines.
ox = tip[0] * dispersion[0] + side[0] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
oy = -tip[1] * dispersion[0] - side[1] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
# The constant 17 is a constant, that is presumably meant to be SIDE_ENGINE_HEIGHT.
# However, SIDE_ENGINE_HEIGHT is defined as 14
# This casuses the position of the thurst on the body of the lander to change, depending on the orientation of the lander.
# This in turn results in an orientation depentant torque being applied to the lander.
impulse_pos = (
self.lander.position[0] + ox - tip[0] * 17 / SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
)
if self.render_mode is not None:
# particles are just a decoration, with no impact on the physics, so don't add them when not rendering
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse(
(
ox * SIDE_ENGINE_POWER * s_power,
oy * SIDE_ENGINE_POWER * s_power,
),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
(pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
self.lander.angle,
20.0 * self.lander.angularVelocity / FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0,
]
assert len(state) == 8
reward = 0
shaping = (
-100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
- 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
- 100 * abs(state[4])
+ 10 * state[6]
+ 10 * state[7]
) # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= (
m_power * 0.30
) # less fuel spent is better, about -30 for heuristic landing
reward -= s_power * 0.03
terminated = False
if self.game_over or abs(state[0]) >= 1.0:
terminated = True
reward = -100
if not self.lander.awake:
terminated = True
reward = +100
if self.render_mode == "human":
self.render()
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[box2d]`"
) from e
if self.screen is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
if self.clock is None:
self.clock = pygame.time.Clock()
self.surf = pygame.Surface((VIEWPORT_W, VIEWPORT_H))
pygame.transform.scale(self.surf, (SCALE, SCALE))
pygame.draw.rect(self.surf, (255, 255, 255), self.surf.get_rect())
for obj in self.particles:
obj.ttl -= 0.15
obj.color1 = (
int(max(0.2, 0.15 + obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
)
obj.color2 = (
int(max(0.2, 0.15 + obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
)
self._clean_particles(False)
for p in self.sky_polys:
scaled_poly = []
for coord in p:
scaled_poly.append((coord[0] * SCALE, coord[1] * SCALE))
pygame.draw.polygon(self.surf, (0, 0, 0), scaled_poly)
gfxdraw.aapolygon(self.surf, scaled_poly, (0, 0, 0))
for obj in self.particles + self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
pygame.draw.circle(
self.surf,
color=obj.color1,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
pygame.draw.circle(
self.surf,
color=obj.color2,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
else:
path = [trans * v * SCALE for v in f.shape.vertices]
pygame.draw.polygon(self.surf, color=obj.color1, points=path)
gfxdraw.aapolygon(self.surf, path, obj.color1)
pygame.draw.aalines(
self.surf, color=obj.color2, points=path, closed=True
)
for x in [self.helipad_x1, self.helipad_x2]:
x = x * SCALE
flagy1 = self.helipad_y * SCALE
flagy2 = flagy1 + 50
pygame.draw.line(
self.surf,
color=(255, 255, 255),
start_pos=(x, flagy1),
end_pos=(x, flagy2),
width=1,
)
pygame.draw.polygon(
self.surf,
color=(204, 204, 0),
points=[
(x, flagy2),
(x, flagy2 - 10),
(x + 25, flagy2 - 5),
],
)
gfxdraw.aapolygon(
self.surf,
[(x, flagy2), (x, flagy2 - 10), (x + 25, flagy2 - 5)],
(204, 204, 0),
)
self.surf = pygame.transform.flip(self.surf, False, True)
if self.render_mode == "human":
assert self.screen is not None
self.screen.blit(self.surf, (0, 0))
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
def heuristic(env, s):
"""
The heuristic for
1. Testing
2. Demonstration rollout.
Args:
env: The environment
s (list): The state. Attributes:
s[0] is the horizontal coordinate
s[1] is the vertical coordinate
s[2] is the horizontal speed
s[3] is the vertical speed
s[4] is the angle
s[5] is the angular speed
s[6] 1 if first leg has contact, else 0
s[7] 1 if second leg has contact, else 0
Returns:
a: The heuristic to be fed into the step function defined above to determine the next step and reward.
"""
angle_targ = s[0] * 0.5 + s[2] * 1.0 # angle should point towards center
if angle_targ > 0.4:
angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
if angle_targ < -0.4:
angle_targ = -0.4
hover_targ = 0.55 * np.abs(
s[0]
) # target y should be proportional to horizontal offset
angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5
if s[6] or s[7]: # legs have contact
angle_todo = 0
hover_todo = (
-(s[3]) * 0.5
) # override to reduce fall speed, that's all we need after contact
if env.continuous:
a = np.array([hover_todo * 20 - 1, -angle_todo * 20])
a = np.clip(a, -1, +1)
else:
a = 0
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
a = 2
elif angle_todo < -0.05:
a = 3
elif angle_todo > +0.05:
a = 1
return a
def demo_heuristic_lander(env, seed=None, render=False):
total_reward = 0
steps = 0
s, info = env.reset(seed=seed)
while True:
a = heuristic(env, s)
s, r, terminated, truncated, info = step_api_compatibility(env.step(a), True)
total_reward += r
if render:
still_open = env.render()
if still_open is False:
break
if steps % 20 == 0 or terminated or truncated:
print("observations:", " ".join([f"{x:+0.2f}" for x in s]))
print(f"step {steps} total_reward {total_reward:+0.2f}")
steps += 1
if terminated or truncated:
break
if render:
env.close()
return total_reward
class LunarLanderContinuous:
def __init__(self):
raise error.Error(
"Error initializing LunarLanderContinuous Environment.\n"
"Currently, we do not support initializing this mode of environment by calling the class directly.\n"
"To use this environment, instead create it by specifying the continuous keyword in gym.make, i.e.\n"
'gym.make("LunarLander-v2", continuous=True)'
)
if __name__ == "__main__":
env = gym.make("LunarLander-v2", render_mode="rgb_array")
demo_heuristic_lander(env, render=True)

View File

@ -0,0 +1,7 @@
from gymnasium.envs.classic_control.acrobot import AcrobotEnv
from gymnasium.envs.classic_control.cartpole import CartPoleEnv
from gymnasium.envs.classic_control.continuous_mountain_car import (
Continuous_MountainCarEnv,
)
from gymnasium.envs.classic_control.mountain_car import MountainCarEnv
from gymnasium.envs.classic_control.pendulum import PendulumEnv

View File

@ -0,0 +1,470 @@
"""classic Acrobot task"""
from typing import Optional
import numpy as np
from numpy import cos, pi, sin
import gymnasium as gym
from gymnasium import Env, spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = [
"Alborz Geramifard",
"Robert H. Klein",
"Christoph Dann",
"William Dabney",
"Jonathan P. How",
]
__license__ = "BSD 3-Clause"
__author__ = "Christoph Dann <cdann@cdann.de>"
# SOURCE:
# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
class AcrobotEnv(Env):
"""
## Description
The Acrobot environment is based on Sutton's work in
["Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding"](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html)
and [Sutton and Barto's book](http://www.incompleteideas.net/book/the-book-2nd.html).
The system consists of two links connected linearly to form a chain, with one end of
the chain fixed. The joint between the two links is actuated. The goal is to apply
torques on the actuated joint to swing the free end of the linear chain above a
given height while starting from the initial state of hanging downwards.
As seen in the **Gif**: two blue links connected by two green joints. The joint in
between the two links is actuated. The goal is to swing the free end of the outer-link
to reach the target height (black horizontal line above system) by applying torque on
the actuator.
## Action Space
The action is discrete, deterministic, and represents the torque applied on the actuated
joint between the two links.
| Num | Action | Unit |
|-----|---------------------------------------|--------------|
| 0 | apply -1 torque to the actuated joint | torque (N m) |
| 1 | apply 0 torque to the actuated joint | torque (N m) |
| 2 | apply 1 torque to the actuated joint | torque (N m) |
## Observation Space
The observation is a `ndarray` with shape `(6,)` that provides information about the
two rotational joint angles as well as their angular velocities:
| Num | Observation | Min | Max |
|-----|------------------------------|---------------------|-------------------|
| 0 | Cosine of `theta1` | -1 | 1 |
| 1 | Sine of `theta1` | -1 | 1 |
| 2 | Cosine of `theta2` | -1 | 1 |
| 3 | Sine of `theta2` | -1 | 1 |
| 4 | Angular velocity of `theta1` | ~ -12.567 (-4 * pi) | ~ 12.567 (4 * pi) |
| 5 | Angular velocity of `theta2` | ~ -28.274 (-9 * pi) | ~ 28.274 (9 * pi) |
where
- `theta1` is the angle of the first joint, where an angle of 0 indicates the first link is pointing directly
downwards.
- `theta2` is ***relative to the angle of the first link.***
An angle of 0 corresponds to having the same angle between the two links.
The angular velocities of `theta1` and `theta2` are bounded at ±4π, and ±9π rad/s respectively.
A state of `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards.
## Rewards
The goal is to have the free end reach a designated target height in as few steps as possible,
and as such all steps that do not reach the goal incur a reward of -1.
Achieving the target height results in termination with a reward of 0. The reward threshold is -100.
## Starting State
Each parameter in the underlying state (`theta1`, `theta2`, and the two angular velocities) is initialized
uniformly between -0.1 and 0.1. This means both links are pointing downwards with some initial stochasticity.
## Episode End
The episode ends if one of the following occurs:
1. Termination: The free end reaches the target height, which is constructed as:
`-cos(theta1) - cos(theta2 + theta1) > 1.0`
2. Truncation: Episode length is greater than 500 (200 for v0)
## Arguments
No additional arguments are currently supported during construction.
```python
import gymnasium as gym
env = gym.make('Acrobot-v1')
```
On reset, the `options` parameter allows the user to change the bounds used to determine
the new random state.
By default, the dynamics of the acrobot follow those described in Sutton and Barto's book
[Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html).
However, a `book_or_nips` parameter can be modified to change the pendulum dynamics to those described
in the original [NeurIPS paper](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html).
```python
# To change the dynamics as described above
env.unwrapped.book_or_nips = 'nips'
```
See the following note for details:
> The dynamics equations were missing some terms in the NIPS paper which
are present in the book. R. Sutton confirmed in personal correspondence
that the experimental results shown in the paper and the book were
generated with the equations shown in the book.
However, there is the option to run the domain with the paper equations
by setting `book_or_nips = 'nips'`
## Version History
- v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of
`theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the
sine and cosine of each angle instead.
- v0: Initial versions release (1.0.0) (removed from gymnasium for v1)
## References
- Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding.
In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8).
MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf
- Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press.
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 15,
}
dt = 0.2
LINK_LENGTH_1 = 1.0 # [m]
LINK_LENGTH_2 = 1.0 # [m]
LINK_MASS_1 = 1.0 #: [kg] mass of link 1
LINK_MASS_2 = 1.0 #: [kg] mass of link 2
LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1
LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2
LINK_MOI = 1.0 #: moments of inertia for both links
MAX_VEL_1 = 4 * pi
MAX_VEL_2 = 9 * pi
AVAIL_TORQUE = [-1.0, 0.0, +1]
torque_noise_max = 0.0
SCREEN_DIM = 500
#: use dynamics equations from the nips paper or the book
book_or_nips = "book"
action_arrow = None
domain_fig = None
actions_num = 3
def __init__(self, render_mode: Optional[str] = None):
self.render_mode = render_mode
self.screen = None
self.clock = None
self.isopen = True
high = np.array(
[1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32
)
low = -high
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.action_space = spaces.Discrete(3)
self.state = None
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(
options, -0.1, 0.1 # default low
) # default high
self.state = self.np_random.uniform(low=low, high=high, size=(4,)).astype(
np.float32
)
if self.render_mode == "human":
self.render()
return self._get_ob(), {}
def step(self, a):
s = self.state
assert s is not None, "Call reset before using AcrobotEnv object."
torque = self.AVAIL_TORQUE[a]
# Add noise to the force action
if self.torque_noise_max > 0:
torque += self.np_random.uniform(
-self.torque_noise_max, self.torque_noise_max
)
# Now, augment the state with our force action so it can be passed to
# _dsdt
s_augmented = np.append(s, torque)
ns = rk4(self._dsdt, s_augmented, [0, self.dt])
ns[0] = wrap(ns[0], -pi, pi)
ns[1] = wrap(ns[1], -pi, pi)
ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
self.state = ns
terminated = self._terminal()
reward = -1.0 if not terminated else 0.0
if self.render_mode == "human":
self.render()
return (self._get_ob(), reward, terminated, False, {})
def _get_ob(self):
s = self.state
assert s is not None, "Call reset before using AcrobotEnv object."
return np.array(
[cos(s[0]), sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]], dtype=np.float32
)
def _terminal(self):
s = self.state
assert s is not None, "Call reset before using AcrobotEnv object."
return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.0)
def _dsdt(self, s_augmented):
m1 = self.LINK_MASS_1
m2 = self.LINK_MASS_2
l1 = self.LINK_LENGTH_1
lc1 = self.LINK_COM_POS_1
lc2 = self.LINK_COM_POS_2
I1 = self.LINK_MOI
I2 = self.LINK_MOI
g = 9.8
a = s_augmented[-1]
s = s_augmented[:-1]
theta1 = s[0]
theta2 = s[1]
dtheta1 = s[2]
dtheta2 = s[3]
d1 = (
m1 * lc1**2
+ m2 * (l1**2 + lc2**2 + 2 * l1 * lc2 * cos(theta2))
+ I1
+ I2
)
d2 = m2 * (lc2**2 + l1 * lc2 * cos(theta2)) + I2
phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.0)
phi1 = (
-m2 * l1 * lc2 * dtheta2**2 * sin(theta2)
- 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2)
+ (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2)
+ phi2
)
if self.book_or_nips == "nips":
# the following line is consistent with the description in the
# paper
ddtheta2 = (a + d2 / d1 * phi1 - phi2) / (m2 * lc2**2 + I2 - d2**2 / d1)
else:
# the following line is consistent with the java implementation and the
# book
ddtheta2 = (
a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1**2 * sin(theta2) - phi2
) / (m2 * lc2**2 + I2 - d2**2 / d1)
ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[classic-control]`"
) from e
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.SCREEN_DIM, self.SCREEN_DIM)
)
else: # mode in "rgb_array"
self.screen = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM))
if self.clock is None:
self.clock = pygame.time.Clock()
surf = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM))
surf.fill((255, 255, 255))
s = self.state
bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default
scale = self.SCREEN_DIM / (bound * 2)
offset = self.SCREEN_DIM / 2
if s is None:
return None
p1 = [
-self.LINK_LENGTH_1 * cos(s[0]) * scale,
self.LINK_LENGTH_1 * sin(s[0]) * scale,
]
p2 = [
p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]) * scale,
p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1]) * scale,
]
xys = np.array([[0, 0], p1, p2])[:, ::-1]
thetas = [s[0] - pi / 2, s[0] + s[1] - pi / 2]
link_lengths = [self.LINK_LENGTH_1 * scale, self.LINK_LENGTH_2 * scale]
pygame.draw.line(
surf,
start_pos=(-2.2 * scale + offset, 1 * scale + offset),
end_pos=(2.2 * scale + offset, 1 * scale + offset),
color=(0, 0, 0),
)
for (x, y), th, llen in zip(xys, thetas, link_lengths):
x = x + offset
y = y + offset
l, r, t, b = 0, llen, 0.1 * scale, -0.1 * scale
coords = [(l, b), (l, t), (r, t), (r, b)]
transformed_coords = []
for coord in coords:
coord = pygame.math.Vector2(coord).rotate_rad(th)
coord = (coord[0] + x, coord[1] + y)
transformed_coords.append(coord)
gfxdraw.aapolygon(surf, transformed_coords, (0, 204, 204))
gfxdraw.filled_polygon(surf, transformed_coords, (0, 204, 204))
gfxdraw.aacircle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0))
gfxdraw.filled_circle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0))
surf = pygame.transform.flip(surf, False, True)
self.screen.blit(surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
def wrap(x, m, M):
"""Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
Args:
x: a scalar
m: minimum possible value in range
M: maximum possible value in range
Returns:
x: a scalar, wrapped
"""
diff = M - m
while x > M:
x = x - diff
while x < m:
x = x + diff
return x
def bound(x, m, M=None):
"""Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
Args:
x: scalar
m: The lower bound
M: The upper bound
Returns:
x: scalar, bound between min (m) and Max (M)
"""
if M is None:
M = m[1]
m = m[0]
# bound x between min (m) and Max (M)
return min(max(x, m), M)
def rk4(derivs, y0, t):
"""
Integrate 1-D or N-D system of ODEs using 4-th order Runge-Kutta.
Example for 2D system:
>>> def derivs(x):
... d1 = x[0] + 2*x[1]
... d2 = -3*x[0] + 4*x[1]
... return d1, d2
>>> dt = 0.0005
>>> t = np.arange(0.0, 2.0, dt)
>>> y0 = (1,2)
>>> yout = rk4(derivs, y0, t)
Args:
derivs: the derivative of the system and has the signature ``dy = derivs(yi)``
y0: initial state vector
t: sample times
Returns:
yout: Runge-Kutta approximation of the ODE
"""
try:
Ny = len(y0)
except TypeError:
yout = np.zeros((len(t),), np.float_)
else:
yout = np.zeros((len(t), Ny), np.float_)
yout[0] = y0
for i in np.arange(len(t) - 1):
this = t[i]
dt = t[i + 1] - this
dt2 = dt / 2.0
y0 = yout[i]
k1 = np.asarray(derivs(y0))
k2 = np.asarray(derivs(y0 + dt2 * k1))
k3 = np.asarray(derivs(y0 + dt2 * k2))
k4 = np.asarray(derivs(y0 + dt * k3))
yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
# We only care about the final timestep and we cleave off action value which will be zero
return yout[-1][:4]

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -0,0 +1,34 @@
[remap]
importer="texture"
type="CompressedTexture2D"
uid="uid://dk5m5liwecsk"
path="res://.godot/imported/clockwise.png-0b6c4e15d302e93ea5c480ebf7734edb.ctex"
metadata={
"vram_texture": false
}
[deps]
source_file="res://rl/Lib/site-packages/gymnasium/envs/classic_control/assets/clockwise.png"
dest_files=["res://.godot/imported/clockwise.png-0b6c4e15d302e93ea5c480ebf7734edb.ctex"]
[params]
compress/mode=0
compress/high_quality=false
compress/lossy_quality=0.7
compress/hdr_compression=1
compress/normal_map=0
compress/channel_pack=0
mipmaps/generate=false
mipmaps/limit=-1
roughness/mode=0
roughness/src_normal=""
process/fix_alpha_border=true
process/premult_alpha=false
process/normal_map_invert_y=false
process/hdr_as_srgb=false
process/hdr_clamp_exposure=false
process/size_limit=0
detect_3d/compress_to=1

View File

@ -0,0 +1,577 @@
"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
"""
import math
from typing import Optional, Tuple, Union
import numpy as np
import gymnasium as gym
from gymnasium import logger, spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled
from gymnasium.experimental.vector import VectorEnv
from gymnasium.vector.utils import batch_space
class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
"""
## Description
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in
["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077).
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces
in the left and right direction on the cart.
## Action Space
The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
of the fixed force the cart is pushed with.
- 0: Push cart to the left
- 1: Push cart to the right
**Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle
the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it
## Observation Space
The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:
| Num | Observation | Min | Max |
|-----|-----------------------|---------------------|-------------------|
| 0 | Cart Position | -4.8 | 4.8 |
| 1 | Cart Velocity | -Inf | Inf |
| 2 | Pole Angle | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
| 3 | Pole Angular Velocity | -Inf | Inf |
**Note:** While the ranges above denote the possible values for observation space of each element,
it is not reflective of the allowed values of the state space in an unterminated episode. Particularly:
- The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates
if the cart leaves the `(-2.4, 2.4)` range.
- The pole angle can be observed between `(-.418, .418)` radians (or **±24°**), but the episode terminates
if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**)
## Rewards
Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken,
including the termination step, is allotted. The threshold for rewards is 500 for v1 and 200 for v0.
## Starting State
All observations are assigned a uniformly random value in `(-0.05, 0.05)`
## Episode End
The episode ends if any one of the following occurs:
1. Termination: Pole Angle is greater than ±12°
2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display)
3. Truncation: Episode length is greater than 500 (200 for v0)
## Arguments
```python
import gymnasium as gym
gym.make('CartPole-v1')
```
On reset, the `options` parameter allows the user to change the bounds used to determine
the new random state.
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 50,
}
def __init__(self, render_mode: Optional[str] = None):
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = self.masspole + self.masscart
self.length = 0.5 # actually half the pole's length
self.polemass_length = self.masspole * self.length
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
self.kinematics_integrator = "euler"
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds.
high = np.array(
[
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max,
],
dtype=np.float32,
)
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
self.render_mode = render_mode
self.screen_width = 600
self.screen_height = 400
self.screen = None
self.clock = None
self.isopen = True
self.state = None
self.steps_beyond_terminated = None
def step(self, action):
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid"
assert self.state is not None, "Call reset before using step method."
x, x_dot, theta, theta_dot = self.state
force = self.force_mag if action == 1 else -self.force_mag
costheta = math.cos(theta)
sintheta = math.sin(theta)
# For the interested reader:
# https://coneural.org/florian/papers/05_cart_pole.pdf
temp = (
force + self.polemass_length * theta_dot**2 * sintheta
) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / (
self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass)
)
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
if self.kinematics_integrator == "euler":
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
else: # semi-implicit euler
x_dot = x_dot + self.tau * xacc
x = x + self.tau * x_dot
theta_dot = theta_dot + self.tau * thetaacc
theta = theta + self.tau * theta_dot
self.state = (x, x_dot, theta, theta_dot)
terminated = bool(
x < -self.x_threshold
or x > self.x_threshold
or theta < -self.theta_threshold_radians
or theta > self.theta_threshold_radians
)
if not terminated:
reward = 1.0
elif self.steps_beyond_terminated is None:
# Pole just fell!
self.steps_beyond_terminated = 0
reward = 1.0
else:
if self.steps_beyond_terminated == 0:
logger.warn(
"You are calling 'step()' even though this "
"environment has already returned terminated = True. You "
"should always call 'reset()' once you receive 'terminated = "
"True' -- any further steps are undefined behavior."
)
self.steps_beyond_terminated += 1
reward = 0.0
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(
options, -0.05, 0.05 # default low
) # default high
self.state = self.np_random.uniform(low=low, high=high, size=(4,))
self.steps_beyond_terminated = None
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[classic-control]`"
) from e
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.screen_width, self.screen_height)
)
else: # mode == "rgb_array"
self.screen = pygame.Surface((self.screen_width, self.screen_height))
if self.clock is None:
self.clock = pygame.time.Clock()
world_width = self.x_threshold * 2
scale = self.screen_width / world_width
polewidth = 10.0
polelen = scale * (2 * self.length)
cartwidth = 50.0
cartheight = 30.0
if self.state is None:
return None
x = self.state
self.surf = pygame.Surface((self.screen_width, self.screen_height))
self.surf.fill((255, 255, 255))
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
axleoffset = cartheight / 4.0
cartx = x[0] * scale + self.screen_width / 2.0 # MIDDLE OF CART
carty = 100 # TOP OF CART
cart_coords = [(l, b), (l, t), (r, t), (r, b)]
cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords]
gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0))
gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0))
l, r, t, b = (
-polewidth / 2,
polewidth / 2,
polelen - polewidth / 2,
-polewidth / 2,
)
pole_coords = []
for coord in [(l, b), (l, t), (r, t), (r, b)]:
coord = pygame.math.Vector2(coord).rotate_rad(-x[2])
coord = (coord[0] + cartx, coord[1] + carty + axleoffset)
pole_coords.append(coord)
gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101))
gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101))
gfxdraw.aacircle(
self.surf,
int(cartx),
int(carty + axleoffset),
int(polewidth / 2),
(129, 132, 203),
)
gfxdraw.filled_circle(
self.surf,
int(cartx),
int(carty + axleoffset),
int(polewidth / 2),
(129, 132, 203),
)
gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0))
self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
class CartPoleVectorEnv(VectorEnv):
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 50,
}
def __init__(
self,
num_envs: int = 2,
max_episode_steps: int = 500,
render_mode: Optional[str] = None,
):
super().__init__()
self.num_envs = num_envs
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = self.masspole + self.masscart
self.length = 0.5 # actually half the pole's length
self.polemass_length = self.masspole * self.length
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
self.kinematics_integrator = "euler"
self.max_episode_steps = max_episode_steps
self.steps = np.zeros(num_envs, dtype=np.int32)
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds.
high = np.array(
[
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max,
],
dtype=np.float32,
)
self.low = -0.05
self.high = 0.05
self.single_action_space = spaces.Discrete(2)
self.action_space = batch_space(self.single_action_space, num_envs)
self.single_observation_space = spaces.Box(-high, high, dtype=np.float32)
self.observation_space = batch_space(self.single_observation_space, num_envs)
self.render_mode = render_mode
self.screen_width = 600
self.screen_height = 400
self.screens = None
self.clocks = None
self.isopen = True
self.state = None
self.steps_beyond_terminated = None
def step(
self, action: np.ndarray
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, dict]:
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid"
assert self.state is not None, "Call reset before using step method."
x, x_dot, theta, theta_dot = self.state
force = np.sign(action - 0.5) * self.force_mag
costheta = np.cos(theta)
sintheta = np.sin(theta)
# For the interested reader:
# https://coneural.org/florian/papers/05_cart_pole.pdf
temp = (
force + self.polemass_length * theta_dot**2 * sintheta
) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / (
self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass)
)
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
if self.kinematics_integrator == "euler":
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
else: # semi-implicit euler
x_dot = x_dot + self.tau * xacc
x = x + self.tau * x_dot
theta_dot = theta_dot + self.tau * thetaacc
theta = theta + self.tau * theta_dot
self.state = np.stack((x, x_dot, theta, theta_dot))
terminated: np.ndarray = (
(x < -self.x_threshold)
| (x > self.x_threshold)
| (theta < -self.theta_threshold_radians)
| (theta > self.theta_threshold_radians)
)
self.steps += 1
truncated = self.steps >= self.max_episode_steps
done = terminated | truncated
if any(done):
# This code was generated by copilot, need to check if it works
self.state[:, done] = self.np_random.uniform(
low=self.low, high=self.high, size=(4, done.sum())
).astype(np.float32)
self.steps[done] = 0
reward = np.ones_like(terminated, dtype=np.float32)
if self.render_mode == "human":
self.render()
return self.state.T, reward, terminated, truncated, {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
self.low, self.high = utils.maybe_parse_reset_bounds(
options, -0.05, 0.05 # default low
) # default high
self.state = self.np_random.uniform(
low=self.low, high=self.high, size=(4, self.num_envs)
).astype(np.float32)
self.steps_beyond_terminated = None
if self.render_mode == "human":
self.render()
return self.state.T, {}
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[classic_control]`"
)
if self.screens is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screens = [
pygame.display.set_mode((self.screen_width, self.screen_height))
for _ in range(self.num_envs)
]
else: # mode == "rgb_array"
self.screens = [
pygame.Surface((self.screen_width, self.screen_height))
for _ in range(self.num_envs)
]
if self.clocks is None:
self.clock = [pygame.time.Clock() for _ in range(self.num_envs)]
world_width = self.x_threshold * 2
scale = self.screen_width / world_width
polewidth = 10.0
polelen = scale * (2 * self.length)
cartwidth = 50.0
cartheight = 30.0
if self.state is None:
return None
for state, screen, clock in zip(self.state, self.screens, self.clocks):
x = self.state.T
self.surf = pygame.Surface((self.screen_width, self.screen_height))
self.surf.fill((255, 255, 255))
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
axleoffset = cartheight / 4.0
cartx = x[0] * scale + self.screen_width / 2.0 # MIDDLE OF CART
carty = 100 # TOP OF CART
cart_coords = [(l, b), (l, t), (r, t), (r, b)]
cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords]
gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0))
gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0))
l, r, t, b = (
-polewidth / 2,
polewidth / 2,
polelen - polewidth / 2,
-polewidth / 2,
)
pole_coords = []
for coord in [(l, b), (l, t), (r, t), (r, b)]:
coord = pygame.math.Vector2(coord).rotate_rad(-x[2])
coord = (coord[0] + cartx, coord[1] + carty + axleoffset)
pole_coords.append(coord)
gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101))
gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101))
gfxdraw.aacircle(
self.surf,
int(cartx),
int(carty + axleoffset),
int(polewidth / 2),
(129, 132, 203),
)
gfxdraw.filled_circle(
self.surf,
int(cartx),
int(carty + axleoffset),
int(polewidth / 2),
(129, 132, 203),
)
gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0))
self.surf = pygame.transform.flip(self.surf, False, True)
screen.blit(self.surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
[clock.tick(self.metadata["render_fps"]) for clock in self.clocks]
pygame.display.flip()
elif self.render_mode == "rgb_array":
return [
np.transpose(
np.array(pygame.surfarray.pixels3d(screen)), axes=(1, 0, 2)
)
for screen in self.screens
]
def close(self):
if self.screens is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False

View File

@ -0,0 +1,304 @@
"""
@author: Olivier Sigaud
A merge between two sources:
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the gymnasium MountainCar environment
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
from typing import Optional
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled
class Continuous_MountainCarEnv(gym.Env):
"""
## Description
The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
that can be applied to the car in either direction. The goal of the MDP is to strategically
accelerate the car to reach the goal state on top of the right hill. There are two versions
of the mountain car domain in gymnasium: one with discrete actions and one with continuous.
This version is the one with continuous actions.
This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
```
@TECHREPORT{Moore90efficientmemory-based,
author = {Andrew William Moore},
title = {Efficient Memory-based Learning for Robot Control},
institution = {University of Cambridge},
year = {1990}
}
```
## Observation Space
The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Unit |
|-----|--------------------------------------|------|-----|--------------|
| 0 | position of the car along the x-axis | -Inf | Inf | position (m) |
| 1 | velocity of the car | -Inf | Inf | position (m) |
## Action Space
The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car.
The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015.
## Transition Dynamics:
Given an action, the mountain car follows the following transition dynamics:
*velocity<sub>t+1</sub> = velocity<sub>t+1</sub> + force * self.power - 0.0025 * cos(3 * position<sub>t</sub>)*
*position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
where force is the action clipped to the range `[-1,1]` and power is a constant 0.0015.
The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall.
The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07].
## Reward
A negative reward of *-0.1 * action<sup>2</sup>* is received at each timestep to penalise for
taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100
is added to the negative reward for that timestep.
## Starting State
The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`.
The starting velocity of the car is always assigned to 0.
## Episode End
The episode ends if either of the following happens:
1. Termination: The position of the car is greater than or equal to 0.45 (the goal position on top of the right hill)
2. Truncation: The length of the episode is 999.
## Arguments
```python
import gymnasium as gym
gym.make('MountainCarContinuous-v0')
```
On reset, the `options` parameter allows the user to change the bounds used to determine
the new random state.
## Version History
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 30,
}
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
self.min_action = -1.0
self.max_action = 1.0
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = (
0.45 # was 0.5 in gymnasium, 0.45 in Arnaud de Broissia's version
)
self.goal_velocity = goal_velocity
self.power = 0.0015
self.low_state = np.array(
[self.min_position, -self.max_speed], dtype=np.float32
)
self.high_state = np.array(
[self.max_position, self.max_speed], dtype=np.float32
)
self.render_mode = render_mode
self.screen_width = 600
self.screen_height = 400
self.screen = None
self.clock = None
self.isopen = True
self.action_space = spaces.Box(
low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
)
self.observation_space = spaces.Box(
low=self.low_state, high=self.high_state, dtype=np.float32
)
def step(self, action: np.ndarray):
position = self.state[0]
velocity = self.state[1]
force = min(max(action[0], self.min_action), self.max_action)
velocity += force * self.power - 0.0025 * math.cos(3 * position)
if velocity > self.max_speed:
velocity = self.max_speed
if velocity < -self.max_speed:
velocity = -self.max_speed
position += velocity
if position > self.max_position:
position = self.max_position
if position < self.min_position:
position = self.min_position
if position == self.min_position and velocity < 0:
velocity = 0
# Convert a possible numpy bool to a Python bool.
terminated = bool(
position >= self.goal_position and velocity >= self.goal_velocity
)
reward = 0
if terminated:
reward = 100.0
reward -= math.pow(action[0], 2) * 0.1
self.state = np.array([position, velocity], dtype=np.float32)
if self.render_mode == "human":
self.render()
return self.state, reward, terminated, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def _height(self, xs):
return np.sin(3 * xs) * 0.45 + 0.55
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[classic-control]`"
) from e
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.screen_width, self.screen_height)
)
else: # mode == "rgb_array":
self.screen = pygame.Surface((self.screen_width, self.screen_height))
if self.clock is None:
self.clock = pygame.time.Clock()
world_width = self.max_position - self.min_position
scale = self.screen_width / world_width
carwidth = 40
carheight = 20
self.surf = pygame.Surface((self.screen_width, self.screen_height))
self.surf.fill((255, 255, 255))
pos = self.state[0]
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs - self.min_position) * scale, ys * scale))
pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
clearance = 10
l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
coords = []
for c in [(l, b), (l, t), (r, t), (r, b)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
coords.append(
(
c[0] + (pos - self.min_position) * scale,
c[1] + clearance + self._height(pos) * scale,
)
)
gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
wheel = (
int(c[0] + (pos - self.min_position) * scale),
int(c[1] + clearance + self._height(pos) * scale),
)
gfxdraw.aacircle(
self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
)
gfxdraw.filled_circle(
self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
)
flagx = int((self.goal_position - self.min_position) * scale)
flagy1 = int(self._height(self.goal_position) * scale)
flagy2 = flagy1 + 50
gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0))
gfxdraw.aapolygon(
self.surf,
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
(204, 204, 0),
)
gfxdraw.filled_polygon(
self.surf,
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
(204, 204, 0),
)
self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False

View File

@ -0,0 +1,284 @@
"""
http://incompleteideas.net/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
from typing import Optional
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled
class MountainCarEnv(gym.Env):
"""
## Description
The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
that can be applied to the car in either direction. The goal of the MDP is to strategically
accelerate the car to reach the goal state on top of the right hill. There are two versions
of the mountain car domain in gymnasium: one with discrete actions and one with continuous.
This version is the one with discrete actions.
This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
```
@TECHREPORT{Moore90efficientmemory-based,
author = {Andrew William Moore},
title = {Efficient Memory-based Learning for Robot Control},
institution = {University of Cambridge},
year = {1990}
}
```
## Observation Space
The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Unit |
|-----|--------------------------------------|-------|------|--------------|
| 0 | position of the car along the x-axis | -1.2 | 0.6 | position (m) |
| 1 | velocity of the car | -0.07 | 0.07 | velocity (v) |
## Action Space
There are 3 discrete deterministic actions:
- 0: Accelerate to the left
- 1: Don't accelerate
- 2: Accelerate to the right
## Transition Dynamics:
Given an action, the mountain car follows the following transition dynamics:
*velocity<sub>t+1</sub> = velocity<sub>t</sub> + (action - 1) * force - cos(3 * position<sub>t</sub>) * gravity*
*position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
where force = 0.001 and gravity = 0.0025. The collisions at either end are inelastic with the velocity set to 0
upon collision with the wall. The position is clipped to the range `[-1.2, 0.6]` and
velocity is clipped to the range `[-0.07, 0.07]`.
## Reward:
The goal is to reach the flag placed on top of the right hill as quickly as possible, as such the agent is
penalised with a reward of -1 for each timestep.
## Starting State
The position of the car is assigned a uniform random value in *[-0.6 , -0.4]*.
The starting velocity of the car is always assigned to 0.
## Episode End
The episode ends if either of the following happens:
1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill)
2. Truncation: The length of the episode is 200.
## Arguments
```python
import gymnasium as gym
gym.make('MountainCar-v0')
```
On reset, the `options` parameter allows the user to change the bounds used to determine
the new random state.
## Version History
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 30,
}
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.5
self.goal_velocity = goal_velocity
self.force = 0.001
self.gravity = 0.0025
self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)
self.render_mode = render_mode
self.screen_width = 600
self.screen_height = 400
self.screen = None
self.clock = None
self.isopen = True
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
def step(self, action: int):
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid"
position, velocity = self.state
velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
position += velocity
position = np.clip(position, self.min_position, self.max_position)
if position == self.min_position and velocity < 0:
velocity = 0
terminated = bool(
position >= self.goal_position and velocity >= self.goal_velocity
)
reward = -1.0
self.state = (position, velocity)
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def _height(self, xs):
return np.sin(3 * xs) * 0.45 + 0.55
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[classic-control]`"
) from e
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.screen_width, self.screen_height)
)
else: # mode in "rgb_array"
self.screen = pygame.Surface((self.screen_width, self.screen_height))
if self.clock is None:
self.clock = pygame.time.Clock()
world_width = self.max_position - self.min_position
scale = self.screen_width / world_width
carwidth = 40
carheight = 20
self.surf = pygame.Surface((self.screen_width, self.screen_height))
self.surf.fill((255, 255, 255))
pos = self.state[0]
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs - self.min_position) * scale, ys * scale))
pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
clearance = 10
l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
coords = []
for c in [(l, b), (l, t), (r, t), (r, b)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
coords.append(
(
c[0] + (pos - self.min_position) * scale,
c[1] + clearance + self._height(pos) * scale,
)
)
gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
wheel = (
int(c[0] + (pos - self.min_position) * scale),
int(c[1] + clearance + self._height(pos) * scale),
)
gfxdraw.aacircle(
self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
)
gfxdraw.filled_circle(
self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
)
flagx = int((self.goal_position - self.min_position) * scale)
flagy1 = int(self._height(self.goal_position) * scale)
flagy2 = flagy1 + 50
gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0))
gfxdraw.aapolygon(
self.surf,
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
(204, 204, 0),
)
gfxdraw.filled_polygon(
self.surf,
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
(204, 204, 0),
)
self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def get_keys_to_action(self):
# Control with left and right arrow keys.
return {(): 1, (276,): 0, (275,): 2, (275, 276): 1}
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False

View File

@ -0,0 +1,277 @@
__credits__ = ["Carlos Luis"]
from os import path
from typing import Optional
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled
DEFAULT_X = np.pi
DEFAULT_Y = 1.0
class PendulumEnv(gym.Env):
"""
## Description
The inverted pendulum swingup problem is based on the classic problem in control theory.
The system consists of a pendulum attached at one end to a fixed point, and the other end being free.
The pendulum starts in a random position and the goal is to apply torque on the free end to swing it
into an upright position, with its center of gravity right above the fixed point.
The diagram below specifies the coordinate system used for the implementation of the pendulum's
dynamic equations.
![Pendulum Coordinate System](/_static/diagrams/pendulum.png)
- `x-y`: cartesian coordinates of the pendulum's end in meters.
- `theta` : angle in radians.
- `tau`: torque in `N m`. Defined as positive _counter-clockwise_.
## Action Space
The action is a `ndarray` with shape `(1,)` representing the torque applied to free end of the pendulum.
| Num | Action | Min | Max |
|-----|--------|------|-----|
| 0 | Torque | -2.0 | 2.0 |
## Observation Space
The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free
end and its angular velocity.
| Num | Observation | Min | Max |
|-----|------------------|------|-----|
| 0 | x = cos(theta) | -1.0 | 1.0 |
| 1 | y = sin(theta) | -1.0 | 1.0 |
| 2 | Angular Velocity | -8.0 | 8.0 |
## Rewards
The reward function is defined as:
*r = -(theta<sup>2</sup> + 0.1 * theta_dt<sup>2</sup> + 0.001 * torque<sup>2</sup>)*
where `theta` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position).
Based on the above equation, the minimum reward that can be obtained is
*-(pi<sup>2</sup> + 0.1 * 8<sup>2</sup> + 0.001 * 2<sup>2</sup>) = -16.2736044*,
while the maximum reward is zero (pendulum is upright with zero velocity and no torque applied).
## Starting State
The starting state is a random angle in *[-pi, pi]* and a random angular velocity in *[-1,1]*.
## Episode Truncation
The episode truncates at 200 time steps.
## Arguments
- `g`: acceleration of gravity measured in *(m s<sup>-2</sup>)* used to calculate the pendulum dynamics.
The default value is g = 10.0 .
```python
import gymnasium as gym
gym.make('Pendulum-v1', g=9.81)
```
On reset, the `options` parameter allows the user to change the bounds used to determine
the new random state.
## Version History
* v1: Simplify the math equations, no difference in behavior.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 30,
}
def __init__(self, render_mode: Optional[str] = None, g=10.0):
self.max_speed = 8
self.max_torque = 2.0
self.dt = 0.05
self.g = g
self.m = 1.0
self.l = 1.0
self.render_mode = render_mode
self.screen_dim = 500
self.screen = None
self.clock = None
self.isopen = True
high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
# This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
# or normalised as max_torque == 2 by default. Ignoring the issue here as the default settings are too old
# to update to follow the gymnasium api
self.action_space = spaces.Box(
low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
)
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
def step(self, u):
th, thdot = self.state # th := theta
g = self.g
m = self.m
l = self.l
dt = self.dt
u = np.clip(u, -self.max_torque, self.max_torque)[0]
self.last_u = u # for rendering
costs = angle_normalize(th) ** 2 + 0.1 * thdot**2 + 0.001 * (u**2)
newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l**2) * u) * dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
newth = th + newthdot * dt
self.state = np.array([newth, newthdot])
if self.render_mode == "human":
self.render()
return self._get_obs(), -costs, False, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed)
if options is None:
high = np.array([DEFAULT_X, DEFAULT_Y])
else:
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
x = options.get("x_init") if "x_init" in options else DEFAULT_X
y = options.get("y_init") if "y_init" in options else DEFAULT_Y
x = utils.verify_number_and_cast(x)
y = utils.verify_number_and_cast(y)
high = np.array([x, y])
low = -high # We enforce symmetric limits.
self.state = self.np_random.uniform(low=low, high=high)
self.last_u = None
if self.render_mode == "human":
self.render()
return self._get_obs(), {}
def _get_obs(self):
theta, thetadot = self.state
return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError as e:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gymnasium[classic-control]`"
) from e
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.screen_dim, self.screen_dim)
)
else: # mode in "rgb_array"
self.screen = pygame.Surface((self.screen_dim, self.screen_dim))
if self.clock is None:
self.clock = pygame.time.Clock()
self.surf = pygame.Surface((self.screen_dim, self.screen_dim))
self.surf.fill((255, 255, 255))
bound = 2.2
scale = self.screen_dim / (bound * 2)
offset = self.screen_dim // 2
rod_length = 1 * scale
rod_width = 0.2 * scale
l, r, t, b = 0, rod_length, rod_width / 2, -rod_width / 2
coords = [(l, b), (l, t), (r, t), (r, b)]
transformed_coords = []
for c in coords:
c = pygame.math.Vector2(c).rotate_rad(self.state[0] + np.pi / 2)
c = (c[0] + offset, c[1] + offset)
transformed_coords.append(c)
gfxdraw.aapolygon(self.surf, transformed_coords, (204, 77, 77))
gfxdraw.filled_polygon(self.surf, transformed_coords, (204, 77, 77))
gfxdraw.aacircle(self.surf, offset, offset, int(rod_width / 2), (204, 77, 77))
gfxdraw.filled_circle(
self.surf, offset, offset, int(rod_width / 2), (204, 77, 77)
)
rod_end = (rod_length, 0)
rod_end = pygame.math.Vector2(rod_end).rotate_rad(self.state[0] + np.pi / 2)
rod_end = (int(rod_end[0] + offset), int(rod_end[1] + offset))
gfxdraw.aacircle(
self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77)
)
gfxdraw.filled_circle(
self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77)
)
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
img = pygame.image.load(fname)
if self.last_u is not None:
scale_img = pygame.transform.smoothscale(
img,
(scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2),
)
is_flip = bool(self.last_u > 0)
scale_img = pygame.transform.flip(scale_img, is_flip, True)
self.surf.blit(
scale_img,
(
offset - scale_img.get_rect().centerx,
offset - scale_img.get_rect().centery,
),
)
# drawing axle
gfxdraw.aacircle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0))
gfxdraw.filled_circle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0))
self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
else: # mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
def angle_normalize(x):
return ((x + np.pi) % (2 * np.pi)) - np.pi

View File

@ -0,0 +1,46 @@
"""
Utility functions used for classic control environments.
"""
from typing import Optional, SupportsFloat, Tuple
def verify_number_and_cast(x: SupportsFloat) -> float:
"""Verify parameter is a single number and cast to a float."""
try:
x = float(x)
except (ValueError, TypeError) as e:
raise ValueError(f"An option ({x}) could not be converted to a float.") from e
return x
def maybe_parse_reset_bounds(
options: Optional[dict], default_low: float, default_high: float
) -> Tuple[float, float]:
"""
This function can be called during a reset() to customize the sampling
ranges for setting the initial state distributions.
Args:
options: Options passed in to reset().
default_low: Default lower limit to use, if none specified in options.
default_high: Default upper limit to use, if none specified in options.
Returns:
Tuple of the lower and upper limits.
"""
if options is None:
return default_low, default_high
low = options.get("low") if "low" in options else default_low
high = options.get("high") if "high" in options else default_high
# We expect only numerical inputs.
low = verify_number_and_cast(low)
high = verify_number_and_cast(high)
if low > high:
raise ValueError(
f"Lower bound ({low}) must be lower than higher bound ({high})."
)
return low, high

View File

@ -0,0 +1,15 @@
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # isort:skip
# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly
from gymnasium.envs.mujoco.ant import AntEnv
from gymnasium.envs.mujoco.half_cheetah import HalfCheetahEnv
from gymnasium.envs.mujoco.hopper import HopperEnv
from gymnasium.envs.mujoco.humanoid import HumanoidEnv
from gymnasium.envs.mujoco.humanoidstandup import HumanoidStandupEnv
from gymnasium.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
from gymnasium.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
from gymnasium.envs.mujoco.pusher import PusherEnv
from gymnasium.envs.mujoco.reacher import ReacherEnv
from gymnasium.envs.mujoco.swimmer import SwimmerEnv
from gymnasium.envs.mujoco.walker2d import Walker2dEnv

View File

@ -0,0 +1,80 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class AntEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, "ant.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore) / self.dt
ctrl_cost = 0.5 * np.square(a).sum()
contact_cost = (
0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
)
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
not_terminated = (
np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
)
terminated = not not_terminated
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return (
ob,
reward,
terminated,
False,
dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward,
),
)
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.1, high=0.1
)
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@ -0,0 +1,187 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class AntEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="ant.xml",
ctrl_cost_weight=0.5,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
ctrl_cost_weight,
contact_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
contact_force_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._contact_force_range = contact_force_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def contact_forces(self):
raw_contact_forces = self.sim.data.cfrc_ext
min_value, max_value = self._contact_force_range
contact_forces = np.clip(raw_contact_forces, min_value, max_value)
return contact_forces
@property
def contact_cost(self):
contact_cost = self._contact_cost_weight * np.sum(
np.square(self.contact_forces)
)
return contact_cost
@property
def is_healthy(self):
state = self.state_vector()
min_z, max_z = self._healthy_z_range
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def step(self, action):
xy_position_before = self.get_body_com("torso")[:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.get_body_com("torso")[:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
forward_reward = x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost + contact_cost
reward = rewards - costs
terminated = self.terminated
observation = self._get_obs()
info = {
"reward_forward": forward_reward,
"reward_ctrl": -ctrl_cost,
"reward_contact": -contact_cost,
"reward_survive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
contact_force = self.contact_forces.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
observations = np.concatenate((position, velocity, contact_force))
return observations
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,379 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class AntEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the environment introduced by Schulman,
Moritz, Levine, Jordan and Abbeel in ["High-Dimensional Continuous Control
Using Generalized Advantage Estimation"](https://arxiv.org/abs/1506.02438).
The ant is a 3D robot consisting of one torso (free rotational body) with
four legs attached to it with each leg having two body parts. The goal is to
coordinate the four legs to move in the forward (right) direction by applying
torques on the eight hinges connecting the two body parts of each leg and the torso
(nine body parts and eight hinges).
## Action Space
The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
| 0 | Torque applied on the rotor between the torso and back right hip | -1 | 1 | hip_4 (right_back_leg) | hinge | torque (N m) |
| 1 | Torque applied on the rotor between the back right two links | -1 | 1 | angle_4 (right_back_leg) | hinge | torque (N m) |
| 2 | Torque applied on the rotor between the torso and front left hip | -1 | 1 | hip_1 (front_left_leg) | hinge | torque (N m) |
| 3 | Torque applied on the rotor between the front left two links | -1 | 1 | angle_1 (front_left_leg) | hinge | torque (N m) |
| 4 | Torque applied on the rotor between the torso and front right hip | -1 | 1 | hip_2 (front_right_leg) | hinge | torque (N m) |
| 5 | Torque applied on the rotor between the front right two links | -1 | 1 | angle_2 (front_right_leg) | hinge | torque (N m) |
| 6 | Torque applied on the rotor between the torso and back left hip | -1 | 1 | hip_3 (back_leg) | hinge | torque (N m) |
| 7 | Torque applied on the rotor between the back left two links | -1 | 1 | angle_3 (back_leg) | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the ant,
followed by the velocities of those individual parts (their derivatives) with all
the positions ordered before all the velocities.
By default, observations do not include the x- and y-coordinates of the ant's torso. These may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (29,), float64)` where the first two observations
represent the x- and y- coordinates of the ant's torso.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
of the torso will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
However, by default, observation Space is a `Box(-Inf, Inf, (27,), float64)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
|-----|--------------------------------------------------------------|--------|--------|----------------------------------------|-------|--------------------------|
| 0 | z-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) |
| 1 | x-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 2 | y-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 3 | z-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 4 | w-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
| 5 | angle between torso and first link on front left | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) |
| 6 | angle between the two links on the front left | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) |
| 7 | angle between torso and first link on front right | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) |
| 8 | angle between the two links on the front right | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) |
| 9 | angle between torso and first link on back left | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) |
| 10 | angle between the two links on the back left | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) |
| 11 | angle between torso and first link on back right | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) |
| 12 | angle between the two links on the back right | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) |
| 13 | x-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
| 14 | y-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
| 15 | z-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
| 16 | x-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
| 17 | y-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
| 18 | z-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
| 19 | angular velocity of angle between torso and front left link | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) |
| 20 | angular velocity of the angle between front left links | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) |
| 21 | angular velocity of angle between torso and front right link | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) |
| 22 | angular velocity of the angle between front right links | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) |
| 23 | angular velocity of angle between torso and back left link | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) |
| 24 | angular velocity of the angle between back left links | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) |
| 25 | angular velocity of angle between torso and back right link | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) |
| 26 | angular velocity of the angle between back right links | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) |
| excluded | x-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) |
| excluded | y-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) |
If version < `v4` or `use_contact_forces` is `True` then the observation space is extended by 14*6 = 84 elements, which are contact forces
(external forces - force x, y, z and torque x, y, z) applied to the
center of mass of each of the body parts. The 14 body parts are:
| id (for `v2`, `v3`, `v4)` | body parts |
| --- | ------------ |
| 0 | worldbody (note: forces are always full of zeros) |
| 1 | torso |
| 2 | front_left_leg |
| 3 | aux_1 (front left leg) |
| 4 | ankle_1 (front left leg) |
| 5 | front_right_leg |
| 6 | aux_2 (front right leg) |
| 7 | ankle_2 (front right leg) |
| 8 | back_leg (back left leg) |
| 9 | aux_3 (back left leg) |
| 10 | ankle_3 (back left leg) |
| 11 | right_back_leg |
| 12 | aux_4 (back right leg) |
| 13 | ankle_4 (back right leg) |
The (x,y,z) coordinates are translational DOFs while the orientations are rotational
DOFs expressed as quaternions. One can read more about free joints on the [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
**Note:** Ant-v4 environment no longer has the following contact forces issue.
If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
when using the Ant environment if you would like to report results with contact forces (if
contact forces are not used in your experiments, you can use version > 2.0).
## Rewards
The reward consists of three parts:
- *healthy_reward*: Every timestep that the ant is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`
- *forward_reward*: A reward of moving forward which is measured as
*(x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time
between actions and is dependent on the `frame_skip` parameter (default is 5),
where the frametime is 0.01 - making the default *dt = 5 * 0.01 = 0.05*.
This reward would be positive if the ant moves forward (in positive x direction).
- *ctrl_cost*: A negative reward for penalising the ant if it takes actions
that are too large. It is measured as *`ctrl_cost_weight` * sum(action<sup>2</sup>)*
where *`ctr_cost_weight`* is a parameter set for the control and has a default value of 0.5.
- *contact_cost*: A negative reward for penalising the ant if the external contact
force is too large. It is calculated *`contact_cost_weight` * sum(clip(external contact
force to `contact_force_range`)<sup>2</sup>)*.
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost*.
But if `use_contact_forces=True` or version < `v4`
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost*.
In either case `info` will also contain the individual reward terms.
## Starting State
All observations start in state
(0.0, 0.0, 0.75, 1.0, 0.0 ... 0.0) with a uniform noise in the range
of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional values and standard normal noise
with mean 0 and standard deviation `reset_noise_scale` added to the velocity values for
stochasticity. Note that the initial z coordinate is intentionally selected
to be slightly high, thereby indicating a standing up ant. The initial orientation
is designed to make it face forward as well.
## Episode End
The ant is said to be unhealthy if any of the following happens:
1. Any of the state space values is no longer finite
2. The z-coordinate of the torso is **not** in the closed interval given by `healthy_z_range` (defaults to [0.2, 1.0])
If `terminate_when_unhealthy=True` is passed during construction (which is the default),
the episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
2. Termination: The ant is unhealthy
If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('Ant-v2')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Ant-v4', ctrl_cost_weight=0.1, ...)
```
| Parameter | Type | Default |Description |
|-------------------------|------------|--------------|-------------------------------|
| `xml_file` | **str** | `"ant.xml"` | Path to a MuJoCo model |
| `ctrl_cost_weight` | **float** | `0.5` | Weight for *ctrl_cost* term (see section on reward) |
| `use_contact_forces` | **bool** | `False` | If true, it extends the observation space by adding contact forces (see `Observation Space` section) and includes contact_cost to the reward function (see `Rewards` section) |
| `contact_cost_weight` | **float** | `5e-4` | Weight for *contact_cost* term (see section on reward) |
| `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep |
| `terminate_when_unhealthy` | **bool**| `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` |
| `healthy_z_range` | **tuple** | `(0.2, 1)` | The ant is considered healthy if the z-coordinate of the torso is in this range |
| `contact_force_range` | **tuple** | `(-1, 1)` | Contact forces are clipped to this range in the computation of *contact_cost* |
| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation`| **bool** | `True`| Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3, also removed contact forces from the default observation space (new variable `use_contact_forces=True` can restore them)
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="ant.xml",
ctrl_cost_weight=0.5,
use_contact_forces=False,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
ctrl_cost_weight,
use_contact_forces,
contact_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
contact_force_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._contact_force_range = contact_force_range
self._reset_noise_scale = reset_noise_scale
self._use_contact_forces = use_contact_forces
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
obs_shape = 27
if not exclude_current_positions_from_observation:
obs_shape += 2
if use_contact_forces:
obs_shape += 84
observation_space = Box(
low=-np.inf, high=np.inf, shape=(obs_shape,), dtype=np.float64
)
MujocoEnv.__init__(
self,
xml_file,
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def contact_forces(self):
raw_contact_forces = self.data.cfrc_ext
min_value, max_value = self._contact_force_range
contact_forces = np.clip(raw_contact_forces, min_value, max_value)
return contact_forces
@property
def contact_cost(self):
contact_cost = self._contact_cost_weight * np.sum(
np.square(self.contact_forces)
)
return contact_cost
@property
def is_healthy(self):
state = self.state_vector()
min_z, max_z = self._healthy_z_range
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def step(self, action):
xy_position_before = self.get_body_com("torso")[:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.get_body_com("torso")[:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
forward_reward = x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost = self.control_cost(action)
terminated = self.terminated
observation = self._get_obs()
info = {
"reward_forward": forward_reward,
"reward_ctrl": -ctrl_cost,
"reward_survive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self._use_contact_forces:
contact_cost = self.contact_cost
costs += contact_cost
info["reward_ctrl"] = -contact_cost
reward = rewards - costs
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = self.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
if self._use_contact_forces:
contact_force = self.contact_forces.flat.copy()
return np.concatenate((position, velocity, contact_force))
else:
return np.concatenate((position, velocity))
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,81 @@
<mujoco model="ant">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.01"/>
<custom>
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
</custom>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0.75">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
<body name="front_left_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
<body name="aux_1" pos="0.2 0.2 0">
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 0.2 0">
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="front_right_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
<body name="aux_2" pos="-0.2 0.2 0">
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 0.2 0">
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
<body name="aux_3" pos="-0.2 -0.2 0">
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 -0.2 0">
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="right_back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
<body name="aux_4" pos="0.2 -0.2 0">
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 -0.2 0">
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,96 @@
<!-- Cheetah Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- rootx slider position (m)
- rootz slider position (m)
- rooty hinge angle (rad)
- bthigh hinge angle (rad)
- bshin hinge angle (rad)
- bfoot hinge angle (rad)
- fthigh hinge angle (rad)
- fshin hinge angle (rad)
- ffoot hinge angle (rad)
- rootx slider velocity (m/s)
- rootz slider velocity (m/s)
- rooty hinge angular velocity (rad/s)
- bthigh hinge angular velocity (rad/s)
- bshin hinge angular velocity (rad/s)
- bfoot hinge angular velocity (rad/s)
- fthigh hinge angular velocity (rad/s)
- fshin hinge angular velocity (rad/s)
- ffoot hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- bthigh hinge torque (N m)
- bshin hinge torque (N m)
- bfoot hinge torque (N m)
- fthigh hinge torque (N m)
- fshin hinge torque (N m)
- ffoot hinge torque (N m)
-->
<mujoco model="cheetah">
<compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
<default>
<joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
<geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
<motor ctrllimited="true" ctrlrange="-1 1"/>
</default>
<size nstack="300000" nuser_geom="1"/>
<option gravity="0 0 -9.81" timestep="0.01"/>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 .7">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
<geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
<!-- <site name='tip' pos='.15 0 .11'/>-->
<body name="bthigh" pos="-.5 0 0">
<joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
<geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
<body name="bshin" pos=".16 0 -.25">
<joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
<body name="bfoot" pos="-.28 0 -.14">
<joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
</body>
</body>
</body>
<body name="fthigh" pos=".5 0 0">
<joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
<body name="fshin" pos="-.14 0 -.24">
<joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
<body name="ffoot" pos=".13 0 -.18">
<joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor gear="120" joint="bthigh" name="bthigh"/>
<motor gear="90" joint="bshin" name="bshin"/>
<motor gear="60" joint="bfoot" name="bfoot"/>
<motor gear="120" joint="fthigh" name="fthigh"/>
<motor gear="60" joint="fshin" name="fshin"/>
<motor gear="30" joint="ffoot" name="ffoot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,53 @@
<!--
Hopper model for `Hopper-v5`, based on openai/gym/Walker2d
modified by @saran_t
- To not require `coordinate="global"`
-->
<mujoco model="hopper">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 -0.25" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 -1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 -1.25" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom friction="0.9" name="torso_geom" size="0.05 0.19999999999999996" type="capsule"/>
<body name="thigh" pos="0 0 -0.19999999999999996">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 0" range="-150 0" type="hinge"/>
<geom friction="0.9" pos="0 0 -0.22500000000000009" name="thigh_geom" size="0.05 0.22500000000000003" type="capsule"/>
<body name="leg" pos="0 0 -0.70000000000000007">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
<geom friction="0.9" name="leg_geom" size="0.04 0.25" type="capsule"/>
<body name="foot" pos="0.13 0 -0.35">
<joint axis="0 -1 0" name="foot_joint" pos="-0.13 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" pos="-0.065 0 0.1" quat="0.70710678118654757 0 -0.70710678118654746 0" name="foot_geom" size="0.06 0.195" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,121 @@
<mujoco model="humanoid">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 1.4">
<camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
<geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 -0.04">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0 0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0 0 -0.45">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 -0.04">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0 -0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0 0 -0.45">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,121 @@
<mujoco model="humanoidstandup">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 .105">
<camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
<geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 0">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0.403 0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0.35 0 -.10">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 0">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0.403 -0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0.35 0 -.1">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,47 @@
<!-- Cartpole Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- cart slider position (m)
- pole hinge angle (rad)
- cart slider velocity (m/s)
- pole hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- cart motor force x (N)
-->
<mujoco model="cartpole">
<compiler coordinate="local" inertiafromgeom="true"/>
<custom>
<numeric data="2" name="frame_skip"/>
</custom>
<default>
<joint damping="0.05"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
<size nstack="3000"/>
<worldbody>
<geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<body name="pole2" pos="0 0 0.6">
<joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<site name="tip" pos="0 0 .6" size="0.01 0.01"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,27 @@
<mujoco model="inverted pendulum">
<compiler inertiafromgeom="true"/>
<default>
<joint armature="0" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
<tendon/>
<motor ctrlrange="-3 3"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
<size nstack="3000"/>
<worldbody>
<!--geom name="ground" type="plane" pos="0 0 0" /-->
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
<!-- <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-3 3" gear="100" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,31 @@
<mujoco>
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.02"/>
<default>
<joint armature="0" damping="0" limited="false"/>
<geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0">
<geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
<geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
<joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
</body>
</worldbody>
<actuator>
<!-- Those are just dummy actuators for providing ranges -->
<motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
<motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,91 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
<default>
<joint armature='0.04' damping="1" limited="true"/>
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0">
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
<body name="r_wrist_flex_link" pos="0.321 0 0">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
<body name="r_wrist_roll_link" pos="0 0 0">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
<body name="tips_arm" pos="0 0 0">
<geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
<geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
</body>
<geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<!--<body name="object" pos="0.55 -0.3 -0.275" >-->
<body name="object" pos="0.45 -0.05 -0.275" >
<geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
<geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
<joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
<body name="goal" pos="0.45 -0.05 -0.3230">
<geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,39 @@
<mujoco model="reacher">
<compiler angle="radian" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
<worldbody>
<!-- Arena -->
<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto=" .3 -.3 .01 .3 .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 .3 .01 .3 .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<!-- Arm -->
<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
<body name="body0" pos="0 0 .01">
<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
<body name="body1" pos="0.1 0 0">
<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<body name="fingertip" pos="0.11 0 0">
<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
</body>
</body>
</body>
<!-- Target -->
<body name="target" pos=".1 -.1 .01">
<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,39 @@
<mujoco model="swimmer">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
<default>
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
<joint armature='0.1' />
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
<!-- ================= SWIMMER ================= /-->
<body name="torso" pos="0 0 0">
<camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1"/>
<geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" name="free_body_rot" pos="0 0 0" type="hinge"/>
<body name="mid" pos="0.5 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="motor1_rot" pos="0 0 0" range="-100 100" type="hinge"/>
<body name="back" pos="-1 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="motor2_rot" pos="0 0 0" range="-100 100" type="hinge"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="motor1_rot"/>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="motor2_rot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,68 @@
<!--
Walker2D model for `Walker2d-v5`, based on openai/gym/Walker2d
modified by @kallinteris-Andreas
- To not require `coordinate="global"`
- keep feet friction to 0.9/1.9
-->
<mujoco model="walker2d">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="0.01" damping=".1" limited="true"/>
<geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 -0.25" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 -1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 -1.25" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom friction="0.9" name="torso_geom" size="0.050000000000000003 0.19999999999999996" type="capsule"/>
<body name="thigh" pos="0 0 -0.19999999999999996">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 0" range="-150 0" type="hinge"/>
<geom friction="0.9" pos="0 0 -0.22500000000000009" name="thigh_geom" size="0.050000000000000003 0.22500000000000003" type="capsule"/>
<body name="leg" pos="0 0 -0.70000000000000007">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
<geom friction="0.9" name="leg_geom" size="0.040000000000000001 0.25" type="capsule"/>
<body name="foot" pos="0.20000000000000001 0 -0.34999999999999998">
<joint axis="0 -1 0" name="foot_joint" pos="-0.20000000000000001 0 0.10000000000000001" range="-45 45" type="hinge"/>
<geom friction="0.9" pos="-0.10000000000000001 0 0.10000000000000001" quat="0.70710678118654757 0 -0.70710678118654746 0" name="foot_geom" size="0.059999999999999998 0.10000000000000001" type="capsule"/>
</body>
</body>
</body>
<!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
<body name="thigh_left" pos="0 0 -0.19999999999999996">
<joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 0" range="-150 0" type="hinge"/>
<geom friction="0.9" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.050000000000000003 0.22500000000000003" pos="0 0 -0.22500000000000009" type="capsule"/>
<body name="leg_left" pos="0 0 -0.70000000000000007">
<joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.25" range="-150 0" type="hinge"/>
<geom friction="0.9" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.040000000000000001 0.25" type="capsule"/>
<body name="foot_left" pos="0.20000000000000001 0 -0.34999999999999998">
<joint axis="0 -1 0" name="foot_left_joint" pos="-0.20000000000000001 0 0.10000000000000001" range="-45 45" type="hinge"/>
<geom friction="1.9" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.059999999999999998 0.10000000000000001" pos="-0.10000000000000001 0 0.10000000000000001" type="capsule" quat="0.70710678118654757 0 -0.70710678118654746 0"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
<!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,64 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = -0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore) / self.dt
reward = reward_ctrl + reward_run
terminated = False
if self.render_mode == "human":
self.render()
return (
ob,
reward,
terminated,
False,
dict(reward_run=reward_run, reward_ctrl=reward_ctrl),
)
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@ -0,0 +1,128 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="half_cheetah.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
observation = self._get_obs()
reward = forward_reward - ctrl_cost
terminated = False
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
"reward_run": forward_reward,
"reward_ctrl": -ctrl_cost,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,244 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the work by P. Wawrzyński in
["A Cat-Like Robot Real-Time Learning to Run"](http://staff.elka.pw.edu.pl/~pwawrzyn/pub-s/0812_LSCLRR.pdf).
The HalfCheetah is a 2-dimensional robot consisting of 9 body parts and 8
joints connecting them (including two paws). The goal is to apply a torque
on the joints to make the cheetah run forward (right) as fast as possible,
with a positive reward allocated based on the distance moved forward and a
negative reward allocated for moving backward. The torso and head of the
cheetah are fixed, and the torque can only be applied on the other 6 joints
over the front and back thighs (connecting to the torso), shins
(connecting to the thighs) and feet (connecting to the shins).
## Action Space
The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
| 0 | Torque applied on the back thigh rotor | -1 | 1 | bthigh | hinge | torque (N m) |
| 1 | Torque applied on the back shin rotor | -1 | 1 | bshin | hinge | torque (N m) |
| 2 | Torque applied on the back foot rotor | -1 | 1 | bfoot | hinge | torque (N m) |
| 3 | Torque applied on the front thigh rotor | -1 | 1 | fthigh | hinge | torque (N m) |
| 4 | Torque applied on the front shin rotor | -1 | 1 | fshin | hinge | torque (N m) |
| 5 | Torque applied on the front foot rotor | -1 | 1 | ffoot | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the
cheetah, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
By default, observations do not include the cheetah's `rootx`. It may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (18,), float64)` where the first element
represents the `rootx`.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the
will be returned in `info` with key `"x_position"`.
However, by default, the observation is a `Box(-Inf, Inf, (17,), float64)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | z-coordinate of the front tip | -Inf | Inf | rootz | slide | position (m) |
| 1 | angle of the front tip | -Inf | Inf | rooty | hinge | angle (rad) |
| 2 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angle (rad) |
| 3 | angle of the second rotor | -Inf | Inf | bshin | hinge | angle (rad) |
| 4 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angle (rad) |
| 5 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angle (rad) |
| 6 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angle (rad) |
| 7 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angle (rad) |
| 8 | x-coordinate of the front tip | -Inf | Inf | rootx | slide | velocity (m/s) |
| 9 | y-coordinate of the front tip | -Inf | Inf | rootz | slide | velocity (m/s) |
| 10 | angle of the front tip | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
| 11 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angular velocity (rad/s) |
| 12 | angle of the second rotor | -Inf | Inf | bshin | hinge | angular velocity (rad/s) |
| 13 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angular velocity (rad/s) |
| 14 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angular velocity (rad/s) |
| 15 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angular velocity (rad/s) |
| 16 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angular velocity (rad/s) |
| excluded | x-coordinate of the front tip | -Inf | Inf | rootx | slide | position (m) |
## Rewards
The reward consists of two parts:
- *forward_reward*: A reward of moving forward which is measured
as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
the time between actions and is dependent on the frame_skip parameter
(fixed to 5), where the frametime is 0.01 - making the
default *dt = 5 * 0.01 = 0.05*. This reward would be positive if the cheetah
runs forward (right).
- *ctrl_cost*: A cost for penalising the cheetah if it takes
actions that are too large. It is measured as *`ctrl_cost_weight` *
sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
control and has a default value of 0.1
The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,) with a noise added to the
initial state for stochasticity. As seen before, the first 8 values in the
state are positional and the last 9 values are velocity. A uniform noise in
the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the positional values while a standard
normal noise with a mean of 0 and standard deviation of `reset_noise_scale` is added to the
initial velocity values of all zeros.
## Episode End
The episode truncates when the episode length is greater than 1000.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('HalfCheetah-v2')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('HalfCheetah-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"half_cheetah.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ weight (see section on reward) |
| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"half_cheetah.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
x_position_before = self.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
observation = self._get_obs()
reward = forward_reward - ctrl_cost
terminated = False
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
"reward_run": forward_reward,
"reward_ctrl": -ctrl_cost,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = self.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,67 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "hopper.xml", 4, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
s = self.state_vector()
terminated = not (
np.isfinite(s).all()
and (np.abs(s[2:]) < 100).all()
and (height > 0.7)
and (abs(ang) < 0.2)
)
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.75
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,178 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 3.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
xml_file="hopper.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float("inf")),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_state_range,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_state_range = healthy_state_range
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 4, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.sim.data.qpos[1:3]
state = self.state_vector()[2:]
min_state, max_state = self._healthy_state_range
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = all((healthy_state, healthy_z, healthy_angle))
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,298 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 3.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class HopperEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the work done by Erez, Tassa, and Todorov in
["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf). The environment aims to
increase the number of independent state and control variables as compared to
the classic control environments. The hopper is a two-dimensional
one-legged figure that consist of four main body parts - the torso at the
top, the thigh in the middle, the leg in the bottom, and a single foot on
which the entire body rests. The goal is to make hops that move in the
forward (right) direction by applying torques on the three hinges
connecting the four body parts.
## Action Space
The action space is a `Box(-1, 1, (3,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
| 0 | Torque applied on the thigh rotor | -1 | 1 | thigh_joint | hinge | torque (N m) |
| 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) |
| 2 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the
hopper, followed by the velocities of those individual parts
(their derivatives) with all the positions ordered before all the velocities.
By default, observations do not include the x-coordinate of the hopper. It may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be `Box(-Inf, Inf, (12,), float64)` where the first observation
represents the x-coordinate of the hopper.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
will be returned in `info` with key `"x_position"`.
However, by default, the observation is a `Box(-Inf, Inf, (11,), float64)` where the elements
correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | -------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | z-coordinate of the torso (height of hopper) | -Inf | Inf | rootz | slide | position (m) |
| 1 | angle of the torso | -Inf | Inf | rooty | hinge | angle (rad) |
| 2 | angle of the thigh joint | -Inf | Inf | thigh_joint | hinge | angle (rad) |
| 3 | angle of the leg joint | -Inf | Inf | leg_joint | hinge | angle (rad) |
| 4 | angle of the foot joint | -Inf | Inf | foot_joint | hinge | angle (rad) |
| 5 | velocity of the x-coordinate of the torso | -Inf | Inf | rootx | slide | velocity (m/s) |
| 6 | velocity of the z-coordinate (height) of the torso | -Inf | Inf | rootz | slide | velocity (m/s) |
| 7 | angular velocity of the angle of the torso | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
| 8 | angular velocity of the thigh hinge | -Inf | Inf | thigh_joint | hinge | angular velocity (rad/s) |
| 9 | angular velocity of the leg hinge | -Inf | Inf | leg_joint | hinge | angular velocity (rad/s) |
| 10 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) |
| excluded | x-coordinate of the torso | -Inf | Inf | rootx | slide | position (m) |
## Rewards
The reward consists of three parts:
- *healthy_reward*: Every timestep that the hopper is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`.
- *forward_reward*: A reward of hopping forward which is measured
as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
the time between actions and is dependent on the frame_skip parameter
(fixed to 4), where the frametime is 0.002 - making the
default *dt = 4 * 0.002 = 0.008*. This reward would be positive if the hopper
hops forward (positive x direction).
- *ctrl_cost*: A cost for penalising the hopper if it takes
actions that are too large. It is measured as *`ctrl_cost_weight` *
sum(action<sup>2</sup>)* where *`ctrl_cost_weight`* is a parameter set for the
control and has a default value of 0.001
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state
(0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise
in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity.
## Episode End
The hopper is said to be unhealthy if any of the following happens:
1. An element of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) is no longer contained in the closed interval specified by the argument `healthy_state_range`
2. The height of the hopper (`observation[0]` if `exclude_current_positions_from_observation=True`, else `observation[1]`) is no longer contained in the closed interval specified by the argument `healthy_z_range` (usually meaning that it has fallen)
3. The angle (`observation[1]` if `exclude_current_positions_from_observation=True`, else `observation[2]`) is no longer contained in the closed interval specified by the argument `healthy_angle_range`
If `terminate_when_unhealthy=True` is passed during construction (which is the default),
the episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
2. Termination: The hopper is unhealthy
If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('Hopper-v2')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Hopper-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"hopper.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `0.001` | Weight for _ctrl_cost_ reward (see section on reward) |
| `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep |
| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the hopper is no longer healthy |
| `healthy_state_range` | **tuple** | `(-100, 100)` | The elements of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) must be in this range for the hopper to be considered healthy |
| `healthy_z_range` | **tuple** | `(0.7, float("inf"))` | The z-coordinate must be in this range for the hopper to be considered healthy |
| `healthy_angle_range` | **tuple** | `(-0.2, 0.2)` | The angle given by `observation[1]` (if `exclude_current_positions_from_observation=True`, else `observation[2]`) must be in this range for the hopper to be considered healthy |
| `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float("inf")),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_state_range,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_state_range = healthy_state_range
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"hopper.xml",
4,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.data.qpos[1:3]
state = self.state_vector()[2:]
min_state, max_state = self._healthy_state_range
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = all((healthy_state, healthy_z, healthy_angle))
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = np.clip(self.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,94 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, "humanoid.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.sim.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.sim.data
lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.sim.data.qpos
terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
terminated,
False,
dict(
reward_linvel=lin_vel_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_alive=alive_bonus,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 2.0
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,200 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 2.0)),
"elevation": -20.0,
}
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, axis=1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(
self,
xml_file="humanoid.xml",
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
contact_cost_weight=5e-7,
contact_cost_range=(-np.inf, 10.0),
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
contact_cost_weight,
contact_cost_range,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._contact_cost_range = contact_cost_range
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl))
return control_cost
@property
def contact_cost(self):
contact_forces = self.sim.data.cfrc_ext
contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces))
min_cost, max_cost = self._contact_cost_range
contact_cost = np.clip(contact_cost, min_cost, max_cost)
return contact_cost
@property
def is_healthy(self):
min_z, max_z = self._healthy_z_range
is_healthy = min_z < self.sim.data.qpos[2] < max_z
return is_healthy
@property
def terminated(self):
terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
com_inertia = self.sim.data.cinert.flat.copy()
com_velocity = self.sim.data.cvel.flat.copy()
actuator_forces = self.sim.data.qfrc_actuator.flat.copy()
external_contact_forces = self.sim.data.cfrc_ext.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
return np.concatenate(
(
position,
velocity,
com_inertia,
com_velocity,
actuator_forces,
external_contact_forces,
)
)
def step(self, action):
xy_position_before = mass_center(self.model, self.sim)
self.do_simulation(action, self.frame_skip)
xy_position_after = mass_center(self.model, self.sim)
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost + contact_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"reward_linvel": forward_reward,
"reward_quadctrl": -ctrl_cost,
"reward_alive": healthy_reward,
"reward_impact": -contact_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@ -0,0 +1,422 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 2.0)),
"elevation": -20.0,
}
def mass_center(model, data):
mass = np.expand_dims(model.body_mass, axis=1)
xpos = data.xipos
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
class HumanoidEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the environment introduced by Tassa, Erez and Todorov
in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a pair of
legs and arms. The legs each consist of three body parts, and the arms 2 body parts (representing the knees and
elbows respectively). The goal of the environment is to walk forward as fast as possible without falling over.
## Action Space
The action space is a `Box(-1, 1, (17,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|----------------------|---------------|----------------|---------------------------------------|-------|------|
| 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | abdomen_y | hinge | torque (N m) |
| 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | abdomen_z | hinge | torque (N m) |
| 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | abdomen_x | hinge | torque (N m) |
| 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) |
| 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) |
| 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) |
| 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) |
| 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) |
| 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) |
| 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) |
| 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) |
| 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) |
| 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) |
| 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) |
| 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) |
| 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) |
| 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the Humanoid,
followed by the velocities of those individual parts (their derivatives) with all the
positions ordered before all the velocities.
By default, observations do not include the x- and y-coordinates of the torso. These may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (378,), float64)` where the first two observations
represent the x- and y-coordinates of the torso.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
However, by default, the observation is a `Box(-Inf, Inf, (376,), float64)`. The elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
| 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) |
| 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) |
| 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) |
| 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) |
| 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) |
| 10 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) |
| 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) |
| 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) |
| 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) |
| 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) |
| 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) |
| 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) |
| 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) |
| 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) |
| 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) |
| 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) |
| 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) |
| 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) |
| 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) |
| 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) |
| 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) |
| 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) |
| 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) |
| 34 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) |
| 35 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) |
| 36 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) |
| 37 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) |
| 38 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) |
| 39 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) |
| 40 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) |
| 41 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) |
| 42 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) |
| 43 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) |
| 44 | angular velocity of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) |
| excluded | x-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| excluded | y-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
Additionally, after all the positional and velocity based values in the table,
the observation contains (in order):
- *cinert:* Mass and inertia of a single rigid body relative to the center of mass
(this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
and hence adds to another 140 elements in the state space.
- *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
adds another 84 elements in the state space
- *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
`(23,)` *(nv * 1)* and hence adds another 23 elements to the state space.
- *cfrc_ext:* This is the center of mass based external force on the body. It has shape
14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
where *nbody* stands for the number of bodies in the robot and *nv* stands for the
number of degrees of freedom (*= dim(qvel)*)
The body parts are:
| id (for `v2`,`v3`,`v4`) | body part |
| --- | ------------ |
| 0 | worldBody (note: all values are constant 0) |
| 1 | torso |
| 2 | lwaist |
| 3 | pelvis |
| 4 | right_thigh |
| 5 | right_sin |
| 6 | right_foot |
| 7 | left_thigh |
| 8 | left_sin |
| 9 | left_foot |
| 10 | right_upper_arm |
| 11 | right_lower_arm |
| 12 | left_upper_arm |
| 13 | left_lower_arm |
The joints are:
| id (for `v2`,`v3`,`v4`) | joint |
| --- | ------------ |
| 0 | root |
| 1 | root |
| 2 | root |
| 3 | root |
| 4 | root |
| 5 | root |
| 6 | abdomen_z |
| 7 | abdomen_y |
| 8 | abdomen_x |
| 9 | right_hip_x |
| 10 | right_hip_z |
| 11 | right_hip_y |
| 12 | right_knee |
| 13 | left_hip_x |
| 14 | left_hiz_z |
| 15 | left_hip_y |
| 16 | left_knee |
| 17 | right_shoulder1 |
| 18 | right_shoulder2 |
| 19 | right_elbow|
| 20 | left_shoulder1 |
| 21 | left_shoulder2 |
| 22 | left_elfbow |
The (x,y,z) coordinates are translational DOFs while the orientations are rotational
DOFs expressed as quaternions. One can read more about free joints on the
[Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
**Note:** Humanoid-v4 environment no longer has the following contact forces issue.
If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0
results in the contact forces always being 0. As such we recommend to use a Mujoco-Py
version < 2.0 when using the Humanoid environment if you would like to report results
with contact forces (if contact forces are not used in your experiments, you can use
version > 2.0).
## Rewards
The reward consists of three parts:
- *healthy_reward*: Every timestep that the humanoid is alive (see section Episode Termination for definition), it gets a reward of fixed value `healthy_reward`
- *forward_reward*: A reward of walking forward which is measured as *`forward_reward_weight` *
(average center of mass before action - average center of mass after action)/dt*.
*dt* is the time between actions and is dependent on the frame_skip parameter
(default is 5), where the frametime is 0.003 - making the default *dt = 5 * 0.003 = 0.015*.
This reward would be positive if the humanoid walks forward (in positive x-direction). The calculation
for the center of mass is defined in the `.py` file for the Humanoid.
- *ctrl_cost*: A negative reward for penalising the humanoid if it has too
large of a control force. If there are *nu* actuators/controls, then the control has
shape `nu x 1`. It is measured as *`ctrl_cost_weight` * sum(control<sup>2</sup>)*.
- *contact_cost*: A negative reward for penalising the humanoid if the external
contact force is too large. It is calculated by clipping
*`contact_cost_weight` * sum(external contact force<sup>2</sup>)* to the interval specified by `contact_cost_range`.
The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms
## Starting State
All observations start in state
(0.0, 0.0, 1.4, 1.0, 0.0 ... 0.0) with a uniform noise in the range
of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional and velocity values (values in the table)
for stochasticity. Note that the initial z coordinate is intentionally
selected to be high, thereby indicating a standing up humanoid. The initial
orientation is designed to make it face forward as well.
## Episode End
The humanoid is said to be unhealthy if the z-position of the torso is no longer contained in the
closed interval specified by the argument `healthy_z_range`.
If `terminate_when_unhealthy=True` is passed during construction (which is the default),
the episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
3. Termination: The humanoid is unhealthy
If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
## Arguments
No additional arguments are currently supported in v2 and lower.
```python
import gymnasium as gym
env = gym.make('Humanoid-v4')
```
v3 and v4 take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('Humanoid-v4', ctrl_cost_weight=0.1, ....)
```
| Parameter | Type | Default | Description |
| -------------------------------------------- | --------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file` | **str** | `"humanoid.xml"` | Path to a MuJoCo model |
| `forward_reward_weight` | **float** | `1.25` | Weight for _forward_reward_ term (see section on reward) |
| `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ term (see section on reward) |
| `contact_cost_weight` | **float** | `5e-7` | Weight for _contact_cost_ term (see section on reward) |
| `healthy_reward` | **float** | `5.0` | Constant reward given if the humanoid is "healthy" after timestep |
| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` |
| `healthy_z_range` | **tuple** | `(1.0, 2.0)` | The humanoid is considered healthy if the z-coordinate of the torso is in this range |
| `reset_noise_scale` | **float** | `1e-2` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(
self,
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"humanoid.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(self.data.ctrl))
return control_cost
@property
def is_healthy(self):
min_z, max_z = self._healthy_z_range
is_healthy = min_z < self.data.qpos[2] < max_z
return is_healthy
@property
def terminated(self):
terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.data.qpos.flat.copy()
velocity = self.data.qvel.flat.copy()
com_inertia = self.data.cinert.flat.copy()
com_velocity = self.data.cvel.flat.copy()
actuator_forces = self.data.qfrc_actuator.flat.copy()
external_contact_forces = self.data.cfrc_ext.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
return np.concatenate(
(
position,
velocity,
com_inertia,
com_velocity,
actuator_forces,
external_contact_forces,
)
)
def step(self, action):
xy_position_before = mass_center(self.model, self.data)
self.do_simulation(action, self.frame_skip)
xy_position_after = mass_center(self.model, self.data)
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
observation = self._get_obs()
reward = rewards - ctrl_cost
terminated = self.terminated
info = {
"reward_linvel": forward_reward,
"reward_quadctrl": -ctrl_cost,
"reward_alive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation

View File

@ -0,0 +1,87 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self,
"humanoidstandup.xml",
5,
observation_space=observation_space,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.sim.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
False,
False,
dict(
reward_linup=uph_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 0.8925
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,319 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 0.8925)),
"elevation": -20.0,
}
class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is based on the environment introduced by Tassa, Erez and Todorov
in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a
pair of legs and arms. The legs each consist of two links, and so the arms (representing the
knees and elbows respectively). The environment starts with the humanoid laying on the ground,
and then the goal of the environment is to make the humanoid standup and then keep it standing
by applying torques on the various hinges.
## Action Space
The agent take a 17-element vector for actions.
The action space is a continuous `(action, ...)` all in `[-1, 1]`, where `action`
represents the numerical torques applied at the hinge joints.
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ---------------------------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
| 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | abdomen_y | hinge | torque (N m) |
| 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | abdomen_z | hinge | torque (N m) |
| 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | abdomen_x | hinge | torque (N m) |
| 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) |
| 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) |
| 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) |
| 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) |
| 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) |
| 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) |
| 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) |
| 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) |
| 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) |
| 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) |
| 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) |
| 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) |
| 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) |
| 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) |
## Observation Space
Observations consist of positional values of different body parts of the Humanoid,
followed by the velocities of those individual parts (their derivatives) with all the
positions ordered before all the velocities.
By default, observations do not include the x- and y-coordinates of the torso. These may
be included by passing `exclude_current_positions_from_observation=False` during construction.
In that case, the observation space will be a `Box(-Inf, Inf, (378,), float64)` where the first two observations
represent the x- and y-coordinates of the torso.
Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
However, by default, the observation is a `Box(-Inf, Inf, (376,), float64)`. The elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
| 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
| 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) |
| 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) |
| 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) |
| 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) |
| 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) |
| 10 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) |
| 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) |
| 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) |
| 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) |
| 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) |
| 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) |
| 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) |
| 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) |
| 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) |
| 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) |
| 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) |
| 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) |
| 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
| 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
| 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) |
| 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) |
| 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) |
| 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) |
| 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) |
| 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) |
| 34 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) |
| 35 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) |
| 36 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) |
| 37 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) |
| 38 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) |
| 39 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) |
| 40 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) |
| 41 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) |
| 42 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) |
| 43 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) |
| 44 | angular velocity of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) |
| excluded | x-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
| excluded | y-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
Additionally, after all the positional and velocity based values in the table,
the observation contains (in order):
- *cinert:* Mass and inertia of a single rigid body relative to the center of mass
(this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
and hence adds to another 140 elements in the state space.
- *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
adds another 84 elements in the state space
- *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
`(23,)` *(nv * 1)* and hence adds another 23 elements to the state space.
- *cfrc_ext:* This is the center of mass based external force on the body. It has shape
14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
where *nbody* stands for the number of bodies in the robot and *nv* stands for the
number of degrees of freedom (*= dim(qvel)*)
The body parts are:
| id (for `v2`,`v3`,`v4`) | body part |
| --- | ------------ |
| 0 | worldBody (note: all values are constant 0) |
| 1 | torso |
| 2 | lwaist |
| 3 | pelvis |
| 4 | right_thigh |
| 5 | right_sin |
| 6 | right_foot |
| 7 | left_thigh |
| 8 | left_sin |
| 9 | left_foot |
| 10 | right_upper_arm |
| 11 | right_lower_arm |
| 12 | left_upper_arm |
| 13 | left_lower_arm |
The joints are:
| id (for `v2`,`v3`,`v4`) | joint |
| --- | ------------ |
| 0 | root |
| 1 | root |
| 2 | root |
| 3 | root |
| 4 | root |
| 5 | root |
| 6 | abdomen_z |
| 7 | abdomen_y |
| 8 | abdomen_x |
| 9 | right_hip_x |
| 10 | right_hip_z |
| 11 | right_hip_y |
| 12 | right_knee |
| 13 | left_hip_x |
| 14 | left_hiz_z |
| 15 | left_hip_y |
| 16 | left_knee |
| 17 | right_shoulder1 |
| 18 | right_shoulder2 |
| 19 | right_elbow|
| 20 | left_shoulder1 |
| 21 | left_shoulder2 |
| 22 | left_elfbow |
The (x,y,z) coordinates are translational DOFs while the orientations are rotational
DOFs expressed as quaternions. One can read more about free joints on the
[Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
**Note:** HumanoidStandup-v4 environment no longer has the following contact forces issue.
If using previous HumanoidStandup versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
when using the Humanoid environment if you would like to report results with contact forces
(if contact forces are not used in your experiments, you can use version > 2.0).
## Rewards
The reward consists of three parts:
- *uph_cost*: A reward for moving upward (in an attempt to stand up). This is not a relative
reward which measures how much upward it has moved from the last timestep, but it is an
absolute reward which measures how much upward the Humanoid has moved overall. It is
measured as *(z coordinate after action - 0)/(atomic timestep)*, where *z coordinate after
action* is index 0 in the state/index 2 in the table, and *atomic timestep* is the time for
one frame of movement even though the simulation has a framerate of 5 (done in order to inflate
rewards a little for faster learning).
- *quad_ctrl_cost*: A negative reward for penalising the humanoid if it has too large of
a control force. If there are *nu* actuators/controls, then the control has shape `nu x 1`.
It is measured as *0.1 **x** sum(control<sup>2</sup>)*.
- *quad_impact_cost*: A negative reward for penalising the humanoid if the external
contact force is too large. It is calculated as *min(0.5 * 0.000001 * sum(external
contact force<sup>2</sup>), 10)*.
The total reward returned is ***reward*** *=* *uph_cost + 1 - quad_ctrl_cost - quad_impact_cost*
## Starting State
All observations start in state
(0.0, 0.0, 0.105, 1.0, 0.0 ... 0.0) with a uniform noise in the range of
[-0.01, 0.01] added to the positional and velocity values (values in the table)
for stochasticity. Note that the initial z coordinate is intentionally selected
to be low, thereby indicating a laying down humanoid. The initial orientation is
designed to make it face forward as well.
## Episode End
The episode ends when any of the following happens:
1. Truncation: The episode duration reaches a 1000 timesteps
2. Termination: Any of the state space values is no longer finite
## Arguments
No additional arguments are currently supported.
```python
import gymnasium as gym
env = gym.make('HumanoidStandup-v4')
```
There is no v3 for HumanoidStandup, unlike the robot environments where a v3 and
beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
```python
import gymnasium as gym
env = gym.make('HumanoidStandup-v2')
```
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MujocoEnv.__init__(
self,
"humanoidstandup.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.data.qpos[2]
data = self.data
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
False,
False,
dict(
reward_linup=uph_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()

View File

@ -0,0 +1,69 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self,
"inverted_double_pendulum.xml",
5,
observation_space=observation_space,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x**2 + (y - 2) ** 2
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
return ob, r, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10),
]
).ravel()
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent * 0.5
v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]

View File

@ -0,0 +1,179 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 0,
"distance": 4.1225,
"lookat": np.array((0.0, 0.0, 0.12250000000000005)),
}
class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment originates from control theory and builds on the cartpole
environment based on the work done by Barto, Sutton, and Anderson in
["Neuronlike adaptive elements that can solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
powered by the Mujoco physics simulator - allowing for more complex experiments
(such as varying the effects of gravity or constraints). This environment involves a cart that can
moved linearly, with a pole fixed on it and a second pole fixed on the other end of the first one
(leaving the second pole as the only one with one free end). The cart can be pushed left or right,
and the goal is to balance the second pole on top of the first pole, which is in turn on top of the
cart, by applying continuous forces on the cart.
## Action Space
The agent take a 1-element vector for actions.
The action space is a continuous `(action)` in `[-1, 1]`, where `action` represents the
numerical force applied to the cart (with magnitude representing the amount of force and
sign representing the direction)
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
| 0 | Force applied on the cart | -1 | 1 | slider | slide | Force (N) |
## Observation Space
The state space consists of positional values of different body parts of the pendulum system,
followed by the velocities of those individual parts (their derivatives) with all the
positions ordered before all the velocities.
The observation is a `ndarray` with shape `(11,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | ----------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) |
| 1 | sine of the angle between the cart and the first pole | -Inf | Inf | sin(hinge) | hinge | unitless |
| 2 | sine of the angle between the two poles | -Inf | Inf | sin(hinge2) | hinge | unitless |
| 3 | cosine of the angle between the cart and the first pole | -Inf | Inf | cos(hinge) | hinge | unitless |
| 4 | cosine of the angle between the two poles | -Inf | Inf | cos(hinge2) | hinge | unitless |
| 5 | velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) |
| 6 | angular velocity of the angle between the cart and the first pole | -Inf | Inf | hinge | hinge | angular velocity (rad/s) |
| 7 | angular velocity of the angle between the two poles | -Inf | Inf | hinge2 | hinge | angular velocity (rad/s) |
| 8 | constraint force - 1 | -Inf | Inf | | | Force (N) |
| 9 | constraint force - 2 | -Inf | Inf | | | Force (N) |
| 10 | constraint force - 3 | -Inf | Inf | | | Force (N) |
There is physical contact between the robots and their environment - and Mujoco
attempts at getting realistic physics simulations for the possible physical contact
dynamics by aiming for physical accuracy and computational efficiency.
There is one constraint force for contacts for each degree of freedom (3).
The approach and handling of constraints by Mujoco is unique to the simulator
and is based on their research. Once can find more information in their
[*documentation*](https://mujoco.readthedocs.io/en/latest/computation.html)
or in their paper
["Analytically-invertible dynamics with contacts and constraints: Theory and implementation in MuJoCo"](https://homes.cs.washington.edu/~todorov/papers/TodorovICRA14.pdf).
## Rewards
The reward consists of two parts:
- *alive_bonus*: The goal is to make the second inverted pendulum stand upright
(within a certain angle limit) as long as possible - as such a reward of +10 is awarded
for each timestep that the second pole is upright.
- *distance_penalty*: This reward is a measure of how far the *tip* of the second pendulum
(the only free end) moves, and it is calculated as
*0.01 * x<sup>2</sup> + (y - 2)<sup>2</sup>*, where *x* is the x-coordinate of the tip
and *y* is the y-coordinate of the tip of the second pole.
- *velocity_penalty*: A negative reward for penalising the agent if it moves too
fast *0.001 * v<sub>1</sub><sup>2</sup> + 0.005 * v<sub>2</sub> <sup>2</sup>*
The total reward returned is ***reward*** *=* *alive_bonus - distance_penalty - velocity_penalty*
## Starting State
All observations start in state
(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
of [-0.1, 0.1] added to the positional values (cart position and pole angles) and standard
normal force with a standard deviation of 0.1 added to the velocity values for stochasticity.
## Episode End
The episode ends when any of the following happens:
1.Truncation: The episode duration reaches 1000 timesteps.
2.Termination: Any of the state space values is no longer finite.
3.Termination: The y_coordinate of the tip of the second pole *is less than or equal* to 1. The maximum standing height of the system is 1.196 m when all the parts are perpendicularly vertical on top of each other).
## Arguments
No additional arguments are currently supported.
```python
import gymnasium as gym
env = gym.make('InvertedDoublePendulum-v4')
```
There is no v3 for InvertedPendulum, unlike the robot environments where a v3 and
beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('InvertedDoublePendulum-v2')
```
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MujocoEnv.__init__(
self,
"inverted_double_pendulum.xml",
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.data.site_xpos[0]
dist_penalty = 0.01 * x**2 + (y - 2) ** 2
v1, v2 = self.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
return ob, r, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[
self.data.qpos[:1], # cart x pos
np.sin(self.data.qpos[1:]), # link angles
np.cos(self.data.qpos[1:]),
np.clip(self.data.qvel, -10, 10),
np.clip(self.data.qfrc_constraint, -10, 10),
]
).ravel()
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
)
return self._get_obs()

View File

@ -0,0 +1,56 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
MuJocoPyEnv.__init__(
self,
"inverted_pendulum.xml",
2,
observation_space=observation_space,
**kwargs,
)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.01, high=0.01
)
qvel = self.init_qvel + self.np_random.uniform(
size=self.model.nv, low=-0.01, high=0.01
)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 0
self.viewer.cam.distance = self.model.stat.extent

View File

@ -0,0 +1,137 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 0,
"distance": 2.04,
}
class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
"""
## Description
This environment is the cartpole environment based on the work done by
Barto, Sutton, and Anderson in ["Neuronlike adaptive elements that can
solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
just like in the classic environments but now powered by the Mujoco physics simulator -
allowing for more complex experiments (such as varying the effects of gravity).
This environment involves a cart that can moved linearly, with a pole fixed on it
at one end and having another end free. The cart can be pushed left or right, and the
goal is to balance the pole on the top of the cart by applying forces on the cart.
## Action Space
The agent take a 1-element vector for actions.
The action space is a continuous `(action)` in `[-3, 3]`, where `action` represents
the numerical force applied to the cart (with magnitude representing the amount of
force and sign representing the direction)
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
|-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
| 0 | Force applied on the cart | -3 | 3 | slider | slide | Force (N) |
## Observation Space
The state space consists of positional values of different body parts of
the pendulum system, followed by the velocities of those individual parts (their derivatives)
with all the positions ordered before all the velocities.
The observation is a `ndarray` with shape `(4,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
| --- | --------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------- |
| 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) |
| 1 | vertical angle of the pole on the cart | -Inf | Inf | hinge | hinge | angle (rad) |
| 2 | linear velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) |
| 3 | angular velocity of the pole on the cart | -Inf | Inf | hinge | hinge | anglular velocity (rad/s) |
## Rewards
The goal is to make the inverted pendulum stand upright (within a certain angle limit)
as long as possible - as such a reward of +1 is awarded for each timestep that
the pole is upright.
## Starting State
All observations start in state
(0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
of [-0.01, 0.01] added to the values for stochasticity.
## Episode End
The episode ends when any of the following happens:
1. Truncation: The episode duration reaches 1000 timesteps.
2. Termination: Any of the state space values is no longer finite.
3. Termination: The absolute value of the vertical angle between the pole and the cart is greater than 0.2 radian.
## Arguments
No additional arguments are currently supported.
```python
import gymnasium as gym
env = gym.make('InvertedPendulum-v4')
```
There is no v3 for InvertedPendulum, unlike the robot environments where a
v3 and beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc.
```python
import gymnasium as gym
env = gym.make('InvertedPendulum-v2')
```
## Version History
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
* v2: All continuous control environments now use mujoco-py >= 1.50
* v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
MujocoEnv.__init__(
self,
"inverted_pendulum.xml",
2,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.01, high=0.01
)
qvel = self.init_qvel + self.np_random.uniform(
size=self.model.nv, low=-0.01, high=0.01
)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.data.qpos, self.data.qvel]).ravel()

View File

@ -0,0 +1,418 @@
from os import path
from typing import Any, Dict, Optional, Tuple, Union
import numpy as np
from numpy.typing import NDArray
import gymnasium as gym
from gymnasium import error, logger, spaces
from gymnasium.spaces import Space
try:
import mujoco_py
except ImportError as e:
MUJOCO_PY_IMPORT_ERROR = e
else:
MUJOCO_PY_IMPORT_ERROR = None
try:
import mujoco
except ImportError as e:
MUJOCO_IMPORT_ERROR = e
else:
MUJOCO_IMPORT_ERROR = None
DEFAULT_SIZE = 480
class BaseMujocoEnv(gym.Env[np.float64, np.float32]):
"""Superclass for all MuJoCo environments."""
def __init__(
self,
model_path,
frame_skip,
observation_space: Space,
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
"""Base abstract class for mujoco based environments.
Args:
model_path: Path to the MuJoCo Model.
frame_skip: Number of MuJoCo simulation steps per gym `step()`.
observation_space: The observation space of the environment.
render_mode: The `render_mode` used.
width: The width of the render window.
height: The height of the render window.
camera_id: The camera ID used.
camera_name: The name of the camera used (can not be used in conjunction with `camera_id`).
Raises:
OSError: when the `model_path` does not exist.
error.DependencyNotInstalled: When `mujoco` is not installed.
"""
if model_path.startswith(".") or model_path.startswith("/"):
self.fullpath = model_path
elif model_path.startswith("~"):
self.fullpath = path.expanduser(model_path)
else:
self.fullpath = path.join(path.dirname(__file__), "assets", model_path)
if not path.exists(self.fullpath):
raise OSError(f"File {self.fullpath} does not exist")
self.width = width
self.height = height
# may use width and height
self.model, self.data = self._initialize_simulation()
self.init_qpos = self.data.qpos.ravel().copy()
self.init_qvel = self.data.qvel.ravel().copy()
self.frame_skip = frame_skip
assert self.metadata["render_modes"] == [
"human",
"rgb_array",
"depth_array",
], self.metadata["render_modes"]
if "render_fps" in self.metadata:
assert (
int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
self.observation_space = observation_space
self._set_action_space()
self.render_mode = render_mode
self.camera_name = camera_name
self.camera_id = camera_id
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
# methods to override:
# ----------------------------
def reset_model(self) -> NDArray[np.float64]:
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def _initialize_simulation(self) -> Tuple[Any, Any]:
"""
Initialize MuJoCo simulation data structures mjModel and mjData.
"""
raise NotImplementedError
def _reset_simulation(self) -> None:
"""
Reset MuJoCo simulation data structures, mjModel and mjData.
"""
raise NotImplementedError
def _step_mujoco_simulation(self, ctrl, n_frames) -> None:
"""
Step over the MuJoCo simulation.
"""
raise NotImplementedError
def render(self) -> Union[NDArray[np.float64], None]:
"""
Render a frame from the MuJoCo simulation as specified by the render_mode.
"""
raise NotImplementedError
# -----------------------------
def _get_reset_info(self) -> Dict[str, float]:
"""Function that generates the `info` that is returned during a `reset()`."""
return {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._reset_simulation()
ob = self.reset_model()
info = self._get_reset_info()
if self.render_mode == "human":
self.render()
return ob, info
def set_state(self, qpos, qvel) -> None:
"""
Set the joints position qpos and velocity qvel of the model. Override this method depending on the MuJoCo bindings used.
"""
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
@property
def dt(self) -> float:
return self.model.opt.timestep * self.frame_skip
def do_simulation(self, ctrl, n_frames) -> None:
"""
Step the simulation n number of frames and applying a control action.
"""
# Check control input is contained in the action space
if np.array(ctrl).shape != (self.model.nu,):
raise ValueError(
f"Action dimension mismatch. Expected {(self.model.nu,)}, found {np.array(ctrl).shape}"
)
self._step_mujoco_simulation(ctrl, n_frames)
def close(self):
"""Close all processes like rendering contexts"""
raise NotImplementedError
def get_body_com(self, body_name) -> NDArray[np.float64]:
"""Return the cartesian position of a body frame"""
raise NotImplementedError
def state_vector(self) -> NDArray[np.float64]:
"""Return the position and velocity joint states of the model"""
return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])
class MuJocoPyEnv(BaseMujocoEnv):
def __init__(
self,
model_path: str,
frame_skip: int,
observation_space: Space,
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
if MUJOCO_PY_IMPORT_ERROR is not None:
raise error.DependencyNotInstalled(
f"{MUJOCO_PY_IMPORT_ERROR}. "
"(HINT: you need to install mujoco-py, and also perform the setup instructions "
"here: https://github.com/openai/mujoco-py.)"
)
logger.deprecation(
"This version of the mujoco environments depends "
"on the mujoco-py bindings, which are no longer maintained "
"and may stop working. Please upgrade to the v4 versions of "
"the environments (which depend on the mujoco python bindings instead), unless "
"you are trying to precisely replicate previous works)."
)
self.viewer = None
self._viewers = {}
super().__init__(
model_path,
frame_skip,
observation_space,
render_mode,
width,
height,
camera_id,
camera_name,
)
def _initialize_simulation(self):
model = mujoco_py.load_model_from_path(self.fullpath)
self.sim = mujoco_py.MjSim(model)
data = self.sim.data
return model, data
def _reset_simulation(self):
self.sim.reset()
def set_state(self, qpos, qvel):
super().set_state(qpos, qvel)
state = self.sim.get_state()
state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state)
self.sim.set_state(state)
self.sim.forward()
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
def _step_mujoco_simulation(self, ctrl, n_frames):
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
self.sim.step()
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
width, height = self.width, self.height
camera_name, camera_id = self.camera_name, self.camera_id
if self.render_mode in {"rgb_array", "depth_array"}:
if camera_id is not None and camera_name is not None:
raise ValueError(
"Both `camera_id` and `camera_name` cannot be"
" specified at the same time."
)
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = "track"
if camera_id is None and camera_name in self.model._camera_name2id:
if camera_name in self.model._camera_name2id:
camera_id = self.model.camera_name2id(camera_name)
self._get_viewer(self.render_mode).render(
width, height, camera_id=camera_id
)
if self.render_mode == "rgb_array":
data = self._get_viewer(self.render_mode).read_pixels(
width, height, depth=False
)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif self.render_mode == "depth_array":
self._get_viewer(self.render_mode).render(width, height)
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(self.render_mode).read_pixels(
width, height, depth=True
)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif self.render_mode == "human":
self._get_viewer(self.render_mode).render()
def _get_viewer(
self, mode
) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]:
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == "human":
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode in {"rgb_array", "depth_array"}:
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
else:
raise AttributeError(
f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}"
)
self.viewer_setup()
self._viewers[mode] = self.viewer
return self.viewer
def close(self):
if self.viewer is not None:
self.viewer = None
self._viewers = {}
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
Optionally implement this method, if you need to tinker with camera position and so forth.
"""
raise NotImplementedError
class MujocoEnv(BaseMujocoEnv):
"""Superclass for MuJoCo environments."""
def __init__(
self,
model_path,
frame_skip,
observation_space: Space,
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
default_camera_config: Optional[dict] = None,
):
if MUJOCO_IMPORT_ERROR is not None:
raise error.DependencyNotInstalled(
f"{MUJOCO_IMPORT_ERROR}. "
"(HINT: you need to install mujoco, run `pip install gymnasium[mujoco]`.)"
)
super().__init__(
model_path,
frame_skip,
observation_space,
render_mode,
width,
height,
camera_id,
camera_name,
)
from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
self.mujoco_renderer = MujocoRenderer(
self.model, self.data, default_camera_config
)
def _initialize_simulation(
self,
):
model = mujoco.MjModel.from_xml_path(self.fullpath)
# MjrContext will copy model.vis.global_.off* to con.off*
model.vis.global_.offwidth = self.width
model.vis.global_.offheight = self.height
data = mujoco.MjData(model)
return model, data
def _reset_simulation(self):
mujoco.mj_resetData(self.model, self.data)
def set_state(self, qpos, qvel):
super().set_state(qpos, qvel)
self.data.qpos[:] = np.copy(qpos)
self.data.qvel[:] = np.copy(qvel)
if self.model.na == 0:
self.data.act[:] = None
mujoco.mj_forward(self.model, self.data)
def _step_mujoco_simulation(self, ctrl, n_frames):
self.data.ctrl[:] = ctrl
mujoco.mj_step(self.model, self.data, nstep=n_frames)
# As of MuJoCo 2.0, force-related quantities like cacc are not computed
# unless there's a force sensor in the model.
# See https://github.com/openai/gym/issues/1541
mujoco.mj_rnePostConstraint(self.model, self.data)
def render(self):
return self.mujoco_renderer.render(
self.render_mode, self.camera_id, self.camera_name
)
def close(self):
if self.mujoco_renderer is not None:
self.mujoco_renderer.close()
def get_body_com(self, body_name):
return self.data.body(body_name).xpos

View File

@ -0,0 +1,714 @@
import collections
import os
import time
from typing import Optional
import glfw
import imageio
import mujoco
import numpy as np
def _import_egl(width, height):
from mujoco.egl import GLContext
return GLContext(width, height)
def _import_glfw(width, height):
from mujoco.glfw import GLContext
return GLContext(width, height)
def _import_osmesa(width, height):
from mujoco.osmesa import GLContext
return GLContext(width, height)
_ALL_RENDERERS = collections.OrderedDict(
[
("glfw", _import_glfw),
("egl", _import_egl),
("osmesa", _import_osmesa),
]
)
class BaseRender:
def __init__(
self, model: "mujoco.MjModel", data: "mujoco.MjData", width: int, height: int
):
"""Render context superclass for offscreen and window rendering."""
self.model = model
self.data = data
self._markers = []
self._overlays = {}
self.viewport = mujoco.MjrRect(0, 0, width, height)
# This goes to specific visualizer
self.scn = mujoco.MjvScene(self.model, 1000)
self.cam = mujoco.MjvCamera()
self.vopt = mujoco.MjvOption()
self.pert = mujoco.MjvPerturb()
self.make_context_current()
# Keep in Mujoco Context
self.con = mujoco.MjrContext(self.model, mujoco.mjtFontScale.mjFONTSCALE_150)
self._set_mujoco_buffer()
def _set_mujoco_buffer(self):
raise NotImplementedError
def make_context_current(self):
raise NotImplementedError
def add_overlay(self, gridpos: int, text1: str, text2: str):
"""Overlays text on the scene."""
if gridpos not in self._overlays:
self._overlays[gridpos] = ["", ""]
self._overlays[gridpos][0] += text1 + "\n"
self._overlays[gridpos][1] += text2 + "\n"
def add_marker(self, **marker_params):
self._markers.append(marker_params)
def _add_marker_to_scene(self, marker: dict):
if self.scn.ngeom >= self.scn.maxgeom:
raise RuntimeError("Ran out of geoms. maxgeom: %d" % self.scn.maxgeom)
g = self.scn.geoms[self.scn.ngeom]
# default values.
g.dataid = -1
g.objtype = mujoco.mjtObj.mjOBJ_UNKNOWN
g.objid = -1
g.category = mujoco.mjtCatBit.mjCAT_DECOR
g.texid = -1
g.texuniform = 0
g.texrepeat[0] = 1
g.texrepeat[1] = 1
g.emission = 0
g.specular = 0.5
g.shininess = 0.5
g.reflectance = 0
g.type = mujoco.mjtGeom.mjGEOM_BOX
g.size[:] = np.ones(3) * 0.1
g.mat[:] = np.eye(3)
g.rgba[:] = np.ones(4)
for key, value in marker.items():
if isinstance(value, (int, float, mujoco._enums.mjtGeom)):
setattr(g, key, value)
elif isinstance(value, (tuple, list, np.ndarray)):
attr = getattr(g, key)
attr[:] = np.asarray(value).reshape(attr.shape)
elif isinstance(value, str):
assert key == "label", "Only label is a string in mjtGeom."
if value is None:
g.label[0] = 0
else:
g.label = value
elif hasattr(g, key):
raise ValueError(
"mjtGeom has attr {} but type {} is invalid".format(
key, type(value)
)
)
else:
raise ValueError("mjtGeom doesn't have field %s" % key)
self.scn.ngeom += 1
def close(self):
"""Override close in your rendering subclass to perform any necessary cleanup
after env.close() is called.
"""
raise NotImplementedError
class OffScreenViewer(BaseRender):
"""Offscreen rendering class with opengl context."""
def __init__(self, model: "mujoco.MjMujoco", data: "mujoco.MjData"):
width = model.vis.global_.offwidth
height = model.vis.global_.offheight
# We must make GLContext before MjrContext
self._get_opengl_backend(width, height)
super().__init__(model, data, width, height)
self._init_camera()
def _init_camera(self):
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
self.cam.fixedcamid = -1
for i in range(3):
self.cam.lookat[i] = np.median(self.data.geom_xpos[:, i])
self.cam.distance = self.model.stat.extent
def _get_opengl_backend(self, width: int, height: int):
self.backend = os.environ.get("MUJOCO_GL")
if self.backend is not None:
try:
self.opengl_context = _ALL_RENDERERS[self.backend](width, height)
except KeyError as e:
raise RuntimeError(
"Environment variable {} must be one of {!r}: got {!r}.".format(
"MUJOCO_GL", _ALL_RENDERERS.keys(), self.backend
)
) from e
else:
for name, _ in _ALL_RENDERERS.items():
try:
self.opengl_context = _ALL_RENDERERS[name](width, height)
self.backend = name
break
except: # noqa:E722
pass
if self.backend is None:
raise RuntimeError(
"No OpenGL backend could be imported. Attempting to create a "
"rendering context will result in a RuntimeError."
)
def _set_mujoco_buffer(self):
mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_OFFSCREEN, self.con)
def make_context_current(self):
self.opengl_context.make_current()
def free(self):
self.opengl_context.free()
def __del__(self):
self.free()
def render(
self,
render_mode: str,
camera_id: Optional[int] = None,
segmentation: bool = False,
):
if camera_id is not None:
if camera_id == -1:
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
else:
self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED
self.cam.fixedcamid = camera_id
mujoco.mjv_updateScene(
self.model,
self.data,
self.vopt,
self.pert,
self.cam,
mujoco.mjtCatBit.mjCAT_ALL,
self.scn,
)
if segmentation:
self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 1
self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 1
for marker_params in self._markers:
self._add_marker_to_scene(marker_params)
mujoco.mjr_render(self.viewport, self.scn, self.con)
for gridpos, (text1, text2) in self._overlays.items():
mujoco.mjr_overlay(
mujoco.mjtFontScale.mjFONTSCALE_150,
gridpos,
self.viewport,
text1.encode(),
text2.encode(),
self.con,
)
if segmentation:
self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 0
self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 0
rgb_arr = np.zeros(
3 * self.viewport.width * self.viewport.height, dtype=np.uint8
)
depth_arr = np.zeros(
self.viewport.width * self.viewport.height, dtype=np.float32
)
mujoco.mjr_readPixels(rgb_arr, depth_arr, self.viewport, self.con)
if render_mode == "depth_array":
depth_img = depth_arr.reshape(self.viewport.height, self.viewport.width)
# original image is upside-down, so flip it
return depth_img[::-1, :]
else:
rgb_img = rgb_arr.reshape(self.viewport.height, self.viewport.width, 3)
if segmentation:
seg_img = (
rgb_img[:, :, 0]
+ rgb_img[:, :, 1] * (2**8)
+ rgb_img[:, :, 2] * (2**16)
)
seg_img[seg_img >= (self.scn.ngeom + 1)] = 0
seg_ids = np.full(
(self.scn.ngeom + 1, 2), fill_value=-1, dtype=np.int32
)
for i in range(self.scn.ngeom):
geom = self.scn.geoms[i]
if geom.segid != -1:
seg_ids[geom.segid + 1, 0] = geom.objtype
seg_ids[geom.segid + 1, 1] = geom.objid
rgb_img = seg_ids[seg_img]
# original image is upside-down, so flip i
return rgb_img[::-1, :, :]
def close(self):
self.free()
glfw.terminate()
class WindowViewer(BaseRender):
"""Class for window rendering in all MuJoCo environments."""
def __init__(self, model: "mujoco.MjModel", data: "mujoco.MjData"):
glfw.init()
self._button_left_pressed = False
self._button_right_pressed = False
self._last_mouse_x = 0
self._last_mouse_y = 0
self._paused = False
self._transparent = False
self._contacts = False
self._render_every_frame = True
self._image_idx = 0
self._image_path = "/tmp/frame_%07d.png"
self._time_per_render = 1 / 60.0
self._run_speed = 1.0
self._loop_count = 0
self._advance_by_one_step = False
self._hide_menu = False
width, height = glfw.get_video_mode(glfw.get_primary_monitor()).size
glfw.window_hint(glfw.VISIBLE, 1)
self.window = glfw.create_window(width // 2, height // 2, "mujoco", None, None)
self.width, self.height = glfw.get_framebuffer_size(self.window)
window_width, _ = glfw.get_window_size(self.window)
self._scale = self.width * 1.0 / window_width
# set callbacks
glfw.set_cursor_pos_callback(self.window, self._cursor_pos_callback)
glfw.set_mouse_button_callback(self.window, self._mouse_button_callback)
glfw.set_scroll_callback(self.window, self._scroll_callback)
glfw.set_key_callback(self.window, self._key_callback)
super().__init__(model, data, width, height)
glfw.swap_interval(1)
def _set_mujoco_buffer(self):
mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_WINDOW, self.con)
def make_context_current(self):
glfw.make_context_current(self.window)
def free(self):
if self.window:
if glfw.get_current_context() == self.window:
glfw.make_context_current(None)
glfw.destroy_window(self.window)
self.window = None
def __del__(self):
"""Eliminate all of the OpenGL glfw contexts and windows"""
self.free()
def render(self):
"""
Renders the environment geometries in the OpenGL glfw window:
1. Create the overlay for the left side panel menu.
2. Update the geometries used for rendering based on the current state of the model - `mujoco.mjv_updateScene()`.
3. Add markers to scene, these are additional geometries to include in the model, i.e arrows, https://mujoco.readthedocs.io/en/latest/APIreference.html?highlight=arrow#mjtgeom.
These markers are added with the `add_marker()` method before rendering.
4. Render the 3D scene to the window context - `mujoco.mjr_render()`.
5. Render overlays in the window context - `mujoco.mjr_overlay()`.
6. Swap front and back buffer, https://www.glfw.org/docs/3.3/quick.html.
7. Poll events like mouse clicks or keyboard input.
"""
# mjv_updateScene, mjr_render, mjr_overlay
def update():
# fill overlay items
self._create_overlay()
render_start = time.time()
if self.window is None:
return
elif glfw.window_should_close(self.window):
glfw.destroy_window(self.window)
glfw.terminate()
self.viewport.width, self.viewport.height = glfw.get_framebuffer_size(
self.window
)
# update scene
mujoco.mjv_updateScene(
self.model,
self.data,
self.vopt,
mujoco.MjvPerturb(),
self.cam,
mujoco.mjtCatBit.mjCAT_ALL.value,
self.scn,
)
# marker items
for marker in self._markers:
self._add_marker_to_scene(marker)
# render
mujoco.mjr_render(self.viewport, self.scn, self.con)
# overlay items
if not self._hide_menu:
for gridpos, [t1, t2] in self._overlays.items():
mujoco.mjr_overlay(
mujoco.mjtFontScale.mjFONTSCALE_150,
gridpos,
self.viewport,
t1,
t2,
self.con,
)
glfw.swap_buffers(self.window)
glfw.poll_events()
self._time_per_render = 0.9 * self._time_per_render + 0.1 * (
time.time() - render_start
)
if self._paused:
while self._paused:
update()
if self._advance_by_one_step:
self._advance_by_one_step = False
break
else:
self._loop_count += self.model.opt.timestep / (
self._time_per_render * self._run_speed
)
if self._render_every_frame:
self._loop_count = 1
while self._loop_count > 0:
update()
self._loop_count -= 1
# clear overlay
self._overlays.clear()
# clear markers
self._markers.clear()
def close(self):
self.free()
glfw.terminate()
def _key_callback(self, window, key: int, scancode, action: int, mods):
if action != glfw.RELEASE:
return
# Switch cameras
elif key == glfw.KEY_TAB:
self.cam.fixedcamid += 1
self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED
if self.cam.fixedcamid >= self.model.ncam:
self.cam.fixedcamid = -1
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
# Pause simulation
elif key == glfw.KEY_SPACE and self._paused is not None:
self._paused = not self._paused
# Advances simulation by one step.
elif key == glfw.KEY_RIGHT and self._paused is not None:
self._advance_by_one_step = True
self._paused = True
# Slows down simulation
elif key == glfw.KEY_S:
self._run_speed /= 2.0
# Speeds up simulation
elif key == glfw.KEY_F:
self._run_speed *= 2.0
# Turn off / turn on rendering every frame.
elif key == glfw.KEY_D:
self._render_every_frame = not self._render_every_frame
# Capture screenshot
elif key == glfw.KEY_T:
img = np.zeros(
(
glfw.get_framebuffer_size(self.window)[1],
glfw.get_framebuffer_size(self.window)[0],
3,
),
dtype=np.uint8,
)
mujoco.mjr_readPixels(img, None, self.viewport, self.con)
imageio.imwrite(self._image_path % self._image_idx, np.flipud(img))
self._image_idx += 1
# Display contact forces
elif key == glfw.KEY_C:
self._contacts = not self._contacts
self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = self._contacts
self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = self._contacts
# Display coordinate frames
elif key == glfw.KEY_E:
self.vopt.frame = 1 - self.vopt.frame
# Hide overlay menu
elif key == glfw.KEY_H:
self._hide_menu = not self._hide_menu
# Make transparent
elif key == glfw.KEY_R:
self._transparent = not self._transparent
if self._transparent:
self.model.geom_rgba[:, 3] /= 5.0
else:
self.model.geom_rgba[:, 3] *= 5.0
# Geom group visibility
elif key in (glfw.KEY_0, glfw.KEY_1, glfw.KEY_2, glfw.KEY_3, glfw.KEY_4):
self.vopt.geomgroup[key - glfw.KEY_0] ^= 1
# Quit
if key == glfw.KEY_ESCAPE:
print("Pressed ESC")
print("Quitting.")
glfw.destroy_window(self.window)
glfw.terminate()
def _cursor_pos_callback(
self, window: "glfw.LP__GLFWwindow", xpos: float, ypos: float
):
if not (self._button_left_pressed or self._button_right_pressed):
return
mod_shift = (
glfw.get_key(window, glfw.KEY_LEFT_SHIFT) == glfw.PRESS
or glfw.get_key(window, glfw.KEY_RIGHT_SHIFT) == glfw.PRESS
)
if self._button_right_pressed:
action = (
mujoco.mjtMouse.mjMOUSE_MOVE_H
if mod_shift
else mujoco.mjtMouse.mjMOUSE_MOVE_V
)
elif self._button_left_pressed:
action = (
mujoco.mjtMouse.mjMOUSE_ROTATE_H
if mod_shift
else mujoco.mjtMouse.mjMOUSE_ROTATE_V
)
else:
action = mujoco.mjtMouse.mjMOUSE_ZOOM
dx = int(self._scale * xpos) - self._last_mouse_x
dy = int(self._scale * ypos) - self._last_mouse_y
width, height = glfw.get_framebuffer_size(window)
mujoco.mjv_moveCamera(
self.model, action, dx / height, dy / height, self.scn, self.cam
)
self._last_mouse_x = int(self._scale * xpos)
self._last_mouse_y = int(self._scale * ypos)
def _mouse_button_callback(self, window: "glfw.LP__GLFWwindow", button, act, mods):
self._button_left_pressed = (
glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_LEFT) == glfw.PRESS
)
self._button_right_pressed = (
glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_RIGHT) == glfw.PRESS
)
x, y = glfw.get_cursor_pos(window)
self._last_mouse_x = int(self._scale * x)
self._last_mouse_y = int(self._scale * y)
def _scroll_callback(self, window, x_offset, y_offset: float):
mujoco.mjv_moveCamera(
self.model,
mujoco.mjtMouse.mjMOUSE_ZOOM,
0,
-0.05 * y_offset,
self.scn,
self.cam,
)
def _create_overlay(self):
topleft = mujoco.mjtGridPos.mjGRID_TOPLEFT
bottomleft = mujoco.mjtGridPos.mjGRID_BOTTOMLEFT
if self._render_every_frame:
self.add_overlay(topleft, "", "")
else:
self.add_overlay(
topleft,
"Run speed = %.3f x real time" % self._run_speed,
"[S]lower, [F]aster",
)
self.add_overlay(
topleft, "Ren[d]er every frame", "On" if self._render_every_frame else "Off"
)
self.add_overlay(
topleft,
"Switch camera (#cams = %d)" % (self.model.ncam + 1),
"[Tab] (camera ID = %d)" % self.cam.fixedcamid,
)
self.add_overlay(topleft, "[C]ontact forces", "On" if self._contacts else "Off")
self.add_overlay(topleft, "T[r]ansparent", "On" if self._transparent else "Off")
if self._paused is not None:
if not self._paused:
self.add_overlay(topleft, "Stop", "[Space]")
else:
self.add_overlay(topleft, "Start", "[Space]")
self.add_overlay(
topleft, "Advance simulation by one step", "[right arrow]"
)
self.add_overlay(
topleft, "Referenc[e] frames", "On" if self.vopt.frame == 1 else "Off"
)
self.add_overlay(topleft, "[H]ide Menu", "")
if self._image_idx > 0:
fname = self._image_path % (self._image_idx - 1)
self.add_overlay(topleft, "Cap[t]ure frame", "Saved as %s" % fname)
else:
self.add_overlay(topleft, "Cap[t]ure frame", "")
self.add_overlay(topleft, "Toggle geomgroup visibility", "0-4")
self.add_overlay(bottomleft, "FPS", "%d%s" % (1 / self._time_per_render, ""))
self.add_overlay(
bottomleft, "Solver iterations", str(self.data.solver_iter + 1)
)
self.add_overlay(
bottomleft, "Step", str(round(self.data.time / self.model.opt.timestep))
)
self.add_overlay(bottomleft, "timestep", "%.5f" % self.model.opt.timestep)
class MujocoRenderer:
"""This is the MuJoCo renderer manager class for every MuJoCo environment.
The class has two main public methods available:
- :meth:`render` - Renders the environment in three possible modes: "human", "rgb_array", or "depth_array"
- :meth:`close` - Closes all contexts initialized with the renderer
"""
def __init__(
self,
model: "mujoco.MjModel",
data: "mujoco.MjData",
default_cam_config: Optional[dict] = None,
):
"""A wrapper for clipping continuous actions within the valid bound.
Args:
model: MjModel data structure of the MuJoCo simulation
data: MjData data structure of the MuJoCo simulation
default_cam_config: dictionary with attribute values of the viewer's default camera, https://mujoco.readthedocs.io/en/latest/XMLreference.html?highlight=camera#visual-global
"""
self.model = model
self.data = data
self._viewers = {}
self.viewer = None
self.default_cam_config = default_cam_config
def render(
self,
render_mode: str,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
"""Renders a frame of the simulation in a specific format and camera view.
Args:
render_mode: The format to render the frame, it can be: "human", "rgb_array", or "depth_array"
camera_id: The integer camera id from which to render the frame in the MuJoCo simulation
camera_name: The string name of the camera from which to render the frame in the MuJoCo simulation. This argument should not be passed if using cameara_id instead and vice versa
Returns:
If render_mode is "rgb_array" or "depth_arra" it returns a numpy array in the specified format. "human" render mode does not return anything.
"""
viewer = self._get_viewer(render_mode=render_mode)
if render_mode in {
"rgb_array",
"depth_array",
}:
if camera_id is not None and camera_name is not None:
raise ValueError(
"Both `camera_id` and `camera_name` cannot be"
" specified at the same time."
)
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = "track"
if camera_id is None:
camera_id = mujoco.mj_name2id(
self.model,
mujoco.mjtObj.mjOBJ_CAMERA,
camera_name,
)
img = viewer.render(render_mode=render_mode, camera_id=camera_id)
return img
elif render_mode == "human":
return viewer.render()
def _get_viewer(self, render_mode: str):
"""Initializes and returns a viewer class depending on the render_mode
- `WindowViewer` class for "human" render mode
- `OffScreenViewer` class for "rgb_array" or "depth_array" render mode
"""
self.viewer = self._viewers.get(render_mode)
if self.viewer is None:
if render_mode == "human":
self.viewer = WindowViewer(self.model, self.data)
elif render_mode in {"rgb_array", "depth_array"}:
self.viewer = OffScreenViewer(self.model, self.data)
else:
raise AttributeError(
f"Unexpected mode: {render_mode}, expected modes: human, rgb_array, or depth_array"
)
# Add default camera parameters
self._set_cam_config()
self._viewers[render_mode] = self.viewer
if len(self._viewers.keys()) > 1:
# Only one context can be current at a time
self.viewer.make_context_current()
return self.viewer
def _set_cam_config(self):
"""Set the default camera parameters"""
assert self.viewer is not None
if self.default_cam_config is not None:
for key, value in self.default_cam_config.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)
def close(self):
"""Close the OpenGL rendering contexts of all viewer modes"""
for _, viewer in self._viewers.items():
viewer.close()

View File

@ -0,0 +1,84 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box
class PusherEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "pusher.xml", 5, observation_space=observation_space, **kwargs
)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
reward_near = -np.linalg.norm(vec_1)
reward_dist = -np.linalg.norm(vec_2)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (
ob,
reward,
False,
False,
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
)
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = -1
self.viewer.cam.distance = 4.0
def reset_model(self):
qpos = self.init_qpos
self.goal_pos = np.asarray([0, 0])
while True:
self.cylinder_pos = np.concatenate(
[
self.np_random.uniform(low=-0.3, high=0, size=1),
self.np_random.uniform(low=-0.2, high=0.2, size=1),
]
)
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
break
qpos[-4:-2] = self.cylinder_pos
qpos[-2:] = self.goal_pos
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
]
)

Some files were not shown because too many files have changed in this diff Show More