I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,20 @@
from stable_baselines3.common.envs.bit_flipping_env import BitFlippingEnv
from stable_baselines3.common.envs.identity_env import (
FakeImageEnv,
IdentityEnv,
IdentityEnvBox,
IdentityEnvMultiBinary,
IdentityEnvMultiDiscrete,
)
from stable_baselines3.common.envs.multi_input_envs import SimpleMultiObsEnv
__all__ = [
"BitFlippingEnv",
"FakeImageEnv",
"IdentityEnv",
"IdentityEnvBox",
"IdentityEnvMultiBinary",
"IdentityEnvMultiDiscrete",
"SimpleMultiObsEnv",
"SimpleMultiObsEnv",
]

View File

@ -0,0 +1,235 @@
from collections import OrderedDict
from typing import Any, Dict, Optional, Tuple, Union
import numpy as np
from gymnasium import Env, spaces
from gymnasium.envs.registration import EnvSpec
from stable_baselines3.common.type_aliases import GymStepReturn
class BitFlippingEnv(Env):
"""
Simple bit flipping env, useful to test HER.
The goal is to flip all the bits to get a vector of ones.
In the continuous variant, if the ith action component has a value > 0,
then the ith bit will be flipped. Uses a ``MultiBinary`` observation space
by default.
:param n_bits: Number of bits to flip
:param continuous: Whether to use the continuous actions version or not,
by default, it uses the discrete one
:param max_steps: Max number of steps, by default, equal to n_bits
:param discrete_obs_space: Whether to use the discrete observation
version or not, ie a one-hot encoding of all possible states
:param image_obs_space: Whether to use an image observation version
or not, ie a greyscale image of the state
:param channel_first: Whether to use channel-first or last image.
"""
spec = EnvSpec("BitFlippingEnv-v0", "no-entry-point")
state: np.ndarray
def __init__(
self,
n_bits: int = 10,
continuous: bool = False,
max_steps: Optional[int] = None,
discrete_obs_space: bool = False,
image_obs_space: bool = False,
channel_first: bool = True,
render_mode: str = "human",
):
super().__init__()
self.render_mode = render_mode
# Shape of the observation when using image space
self.image_shape = (1, 36, 36) if channel_first else (36, 36, 1)
# The achieved goal is determined by the current state
# here, it is a special where they are equal
# observation space for observations given to the model
self.observation_space = self._make_observation_space(discrete_obs_space, image_obs_space, n_bits)
# observation space used to update internal state
self._obs_space = spaces.MultiBinary(n_bits)
if continuous:
self.action_space = spaces.Box(-1, 1, shape=(n_bits,), dtype=np.float32)
else:
self.action_space = spaces.Discrete(n_bits)
self.continuous = continuous
self.discrete_obs_space = discrete_obs_space
self.image_obs_space = image_obs_space
self.desired_goal = np.ones((n_bits,), dtype=self.observation_space["desired_goal"].dtype)
if max_steps is None:
max_steps = n_bits
self.max_steps = max_steps
self.current_step = 0
def seed(self, seed: int) -> None:
self._obs_space.seed(seed)
def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]:
"""
Convert to discrete space if needed.
:param state:
:return:
"""
if self.discrete_obs_space:
# The internal state is the binary representation of the
# observed one
return int(sum(state[i] * 2**i for i in range(len(state))))
if self.image_obs_space:
size = np.prod(self.image_shape)
image = np.concatenate((state * 255, np.zeros(size - len(state), dtype=np.uint8)))
return image.reshape(self.image_shape).astype(np.uint8)
return state
def convert_to_bit_vector(self, state: Union[int, np.ndarray], batch_size: int) -> np.ndarray:
"""
Convert to bit vector if needed.
:param state: The state to be converted, which can be either an integer or a numpy array.
:param batch_size: The batch size.
:return: The state converted into a bit vector.
"""
# Convert back to bit vector
if isinstance(state, int):
bit_vector = np.array(state).reshape(batch_size, -1)
# Convert to binary representation
bit_vector = ((bit_vector[:, :] & (1 << np.arange(len(self.state)))) > 0).astype(int)
elif self.image_obs_space:
bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255
else:
bit_vector = np.array(state).reshape(batch_size, -1)
return bit_vector
def _make_observation_space(self, discrete_obs_space: bool, image_obs_space: bool, n_bits: int) -> spaces.Dict:
"""
Helper to create observation space
:param discrete_obs_space: Whether to use the discrete observation version
:param image_obs_space: Whether to use the image observation version
:param n_bits: The number of bits used to represent the state
:return: the environment observation space
"""
if discrete_obs_space and image_obs_space:
raise ValueError("Cannot use both discrete and image observation spaces")
if discrete_obs_space:
# In the discrete case, the agent act on the binary
# representation of the observation
return spaces.Dict(
{
"observation": spaces.Discrete(2**n_bits),
"achieved_goal": spaces.Discrete(2**n_bits),
"desired_goal": spaces.Discrete(2**n_bits),
}
)
if image_obs_space:
# When using image as input,
# one image contains the bits 0 -> 0, 1 -> 255
# and the rest is filled with zeros
return spaces.Dict(
{
"observation": spaces.Box(
low=0,
high=255,
shape=self.image_shape,
dtype=np.uint8,
),
"achieved_goal": spaces.Box(
low=0,
high=255,
shape=self.image_shape,
dtype=np.uint8,
),
"desired_goal": spaces.Box(
low=0,
high=255,
shape=self.image_shape,
dtype=np.uint8,
),
}
)
return spaces.Dict(
{
"observation": spaces.MultiBinary(n_bits),
"achieved_goal": spaces.MultiBinary(n_bits),
"desired_goal": spaces.MultiBinary(n_bits),
}
)
def _get_obs(self) -> Dict[str, Union[int, np.ndarray]]:
"""
Helper to create the observation.
:return: The current observation.
"""
return OrderedDict(
[
("observation", self.convert_if_needed(self.state.copy())),
("achieved_goal", self.convert_if_needed(self.state.copy())),
("desired_goal", self.convert_if_needed(self.desired_goal.copy())),
]
)
def reset(
self, *, seed: Optional[int] = None, options: Optional[Dict] = None
) -> Tuple[Dict[str, Union[int, np.ndarray]], Dict]:
if seed is not None:
self._obs_space.seed(seed)
self.current_step = 0
self.state = self._obs_space.sample()
return self._get_obs(), {}
def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
"""
Step into the env.
:param action:
:return:
"""
if self.continuous:
self.state[action > 0] = 1 - self.state[action > 0]
else:
self.state[action] = 1 - self.state[action]
obs = self._get_obs()
reward = float(self.compute_reward(obs["achieved_goal"], obs["desired_goal"], None).item())
terminated = reward == 0
self.current_step += 1
# Episode terminate when we reached the goal or the max number of steps
info = {"is_success": terminated}
truncated = self.current_step >= self.max_steps
return obs, reward, terminated, truncated, info
def compute_reward(
self, achieved_goal: Union[int, np.ndarray], desired_goal: Union[int, np.ndarray], _info: Optional[Dict[str, Any]]
) -> np.float32:
# As we are using a vectorized version, we need to keep track of the `batch_size`
if isinstance(achieved_goal, int):
batch_size = 1
elif self.image_obs_space:
batch_size = achieved_goal.shape[0] if len(achieved_goal.shape) > 3 else 1
else:
batch_size = achieved_goal.shape[0] if len(achieved_goal.shape) > 1 else 1
desired_goal = self.convert_to_bit_vector(desired_goal, batch_size)
achieved_goal = self.convert_to_bit_vector(achieved_goal, batch_size)
# Deceptive reward: it is positive only when the goal is achieved
# Here we are using a vectorized version
distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
return -(distance > 0).astype(np.float32)
def render(self) -> Optional[np.ndarray]: # type: ignore[override]
if self.render_mode == "rgb_array":
return self.state.copy()
print(self.state)
return None
def close(self) -> None:
pass

View File

@ -0,0 +1,159 @@
from typing import Any, Dict, Generic, Optional, Tuple, TypeVar, Union
import gymnasium as gym
import numpy as np
from gymnasium import spaces
from stable_baselines3.common.type_aliases import GymStepReturn
T = TypeVar("T", int, np.ndarray)
class IdentityEnv(gym.Env, Generic[T]):
def __init__(self, dim: Optional[int] = None, space: Optional[spaces.Space] = None, ep_length: int = 100):
"""
Identity environment for testing purposes
:param dim: the size of the action and observation dimension you want
to learn. Provide at most one of ``dim`` and ``space``. If both are
None, then initialization proceeds with ``dim=1`` and ``space=None``.
:param space: the action and observation space. Provide at most one of
``dim`` and ``space``.
:param ep_length: the length of each episode in timesteps
"""
if space is None:
if dim is None:
dim = 1
space = spaces.Discrete(dim)
else:
assert dim is None, "arguments for both 'dim' and 'space' provided: at most one allowed"
self.action_space = self.observation_space = space
self.ep_length = ep_length
self.current_step = 0
self.num_resets = -1 # Becomes 0 after __init__ exits.
self.reset()
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[T, Dict]:
if seed is not None:
super().reset(seed=seed)
self.current_step = 0
self.num_resets += 1
self._choose_next_state()
return self.state, {}
def step(self, action: T) -> Tuple[T, float, bool, bool, Dict[str, Any]]:
reward = self._get_reward(action)
self._choose_next_state()
self.current_step += 1
terminated = False
truncated = self.current_step >= self.ep_length
return self.state, reward, terminated, truncated, {}
def _choose_next_state(self) -> None:
self.state = self.action_space.sample()
def _get_reward(self, action: T) -> float:
return 1.0 if np.all(self.state == action) else 0.0
def render(self, mode: str = "human") -> None:
pass
class IdentityEnvBox(IdentityEnv[np.ndarray]):
def __init__(self, low: float = -1.0, high: float = 1.0, eps: float = 0.05, ep_length: int = 100):
"""
Identity environment for testing purposes
:param low: the lower bound of the box dim
:param high: the upper bound of the box dim
:param eps: the epsilon bound for correct value
:param ep_length: the length of each episode in timesteps
"""
space = spaces.Box(low=low, high=high, shape=(1,), dtype=np.float32)
super().__init__(ep_length=ep_length, space=space)
self.eps = eps
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
reward = self._get_reward(action)
self._choose_next_state()
self.current_step += 1
terminated = False
truncated = self.current_step >= self.ep_length
return self.state, reward, terminated, truncated, {}
def _get_reward(self, action: np.ndarray) -> float:
return 1.0 if (self.state - self.eps) <= action <= (self.state + self.eps) else 0.0
class IdentityEnvMultiDiscrete(IdentityEnv[np.ndarray]):
def __init__(self, dim: int = 1, ep_length: int = 100) -> None:
"""
Identity environment for testing purposes
:param dim: the size of the dimensions you want to learn
:param ep_length: the length of each episode in timesteps
"""
space = spaces.MultiDiscrete([dim, dim])
super().__init__(ep_length=ep_length, space=space)
class IdentityEnvMultiBinary(IdentityEnv[np.ndarray]):
def __init__(self, dim: int = 1, ep_length: int = 100) -> None:
"""
Identity environment for testing purposes
:param dim: the size of the dimensions you want to learn
:param ep_length: the length of each episode in timesteps
"""
space = spaces.MultiBinary(dim)
super().__init__(ep_length=ep_length, space=space)
class FakeImageEnv(gym.Env):
"""
Fake image environment for testing purposes, it mimics Atari games.
:param action_dim: Number of discrete actions
:param screen_height: Height of the image
:param screen_width: Width of the image
:param n_channels: Number of color channels
:param discrete: Create discrete action space instead of continuous
:param channel_first: Put channels on first axis instead of last
"""
def __init__(
self,
action_dim: int = 6,
screen_height: int = 84,
screen_width: int = 84,
n_channels: int = 1,
discrete: bool = True,
channel_first: bool = False,
) -> None:
self.observation_shape = (screen_height, screen_width, n_channels)
if channel_first:
self.observation_shape = (n_channels, screen_height, screen_width)
self.observation_space = spaces.Box(low=0, high=255, shape=self.observation_shape, dtype=np.uint8)
if discrete:
self.action_space = spaces.Discrete(action_dim)
else:
self.action_space = spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)
self.ep_length = 10
self.current_step = 0
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
if seed is not None:
super().reset(seed=seed)
self.current_step = 0
return self.observation_space.sample(), {}
def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
reward = 0.0
self.current_step += 1
terminated = False
truncated = self.current_step >= self.ep_length
return self.observation_space.sample(), reward, terminated, truncated, {}
def render(self, mode: str = "human") -> None:
pass

View File

@ -0,0 +1,183 @@
from typing import Dict, List, Optional, Tuple, Union
import gymnasium as gym
import numpy as np
from gymnasium import spaces
from stable_baselines3.common.type_aliases import GymStepReturn
class SimpleMultiObsEnv(gym.Env):
"""
Base class for GridWorld-based MultiObs Environments 4x4 grid world.
.. code-block:: text
____________
| 0 1 2 3|
| 4|¯5¯¯6¯| 7|
| 8|_9_10_|11|
|12 13 14 15|
¯¯¯¯¯¯¯¯¯¯¯¯¯¯
start is 0
states 5, 6, 9, and 10 are blocked
goal is 15
actions are = [left, down, right, up]
simple linear state env of 15 states but encoded with a vector and an image observation:
each column is represented by a random vector and each row is
represented by a random image, both sampled once at creation time.
:param num_col: Number of columns in the grid
:param num_row: Number of rows in the grid
:param random_start: If true, agent starts in random position
:param channel_last: If true, the image will be channel last, else it will be channel first
"""
def __init__(
self,
num_col: int = 4,
num_row: int = 4,
random_start: bool = True,
discrete_actions: bool = True,
channel_last: bool = True,
):
super().__init__()
self.vector_size = 5
if channel_last:
self.img_size = [64, 64, 1]
else:
self.img_size = [1, 64, 64]
self.random_start = random_start
self.discrete_actions = discrete_actions
if discrete_actions:
self.action_space = spaces.Discrete(4)
else:
self.action_space = spaces.Box(0, 1, (4,))
self.observation_space = spaces.Dict(
spaces={
"vec": spaces.Box(0, 1, (self.vector_size,), dtype=np.float64),
"img": spaces.Box(0, 255, self.img_size, dtype=np.uint8),
}
)
self.count = 0
# Timeout
self.max_count = 100
self.log = ""
self.state = 0
self.action2str = ["left", "down", "right", "up"]
self.init_possible_transitions()
self.num_col = num_col
self.state_mapping: List[Dict[str, np.ndarray]] = []
self.init_state_mapping(num_col, num_row)
self.max_state = len(self.state_mapping) - 1
def init_state_mapping(self, num_col: int, num_row: int) -> None:
"""
Initializes the state_mapping array which holds the observation values for each state
:param num_col: Number of columns.
:param num_row: Number of rows.
"""
# Each column is represented by a random vector
col_vecs = np.random.random((num_col, self.vector_size))
# Each row is represented by a random image
row_imgs = np.random.randint(0, 255, (num_row, 64, 64), dtype=np.uint8)
for i in range(num_col):
for j in range(num_row):
self.state_mapping.append({"vec": col_vecs[i], "img": row_imgs[j].reshape(self.img_size)})
def get_state_mapping(self) -> Dict[str, np.ndarray]:
"""
Uses the state to get the observation mapping.
:return: observation dict {'vec': ..., 'img': ...}
"""
return self.state_mapping[self.state]
def init_possible_transitions(self) -> None:
"""
Initializes the transitions of the environment
The environment exploits the cardinal directions of the grid by noting that
they correspond to simple addition and subtraction from the cell id within the grid
- up => means moving up a row => means subtracting the length of a column
- down => means moving down a row => means adding the length of a column
- left => means moving left by one => means subtracting 1
- right => means moving right by one => means adding 1
Thus one only needs to specify in which states each action is possible
in order to define the transitions of the environment
"""
self.left_possible = [1, 2, 3, 13, 14, 15]
self.down_possible = [0, 4, 8, 3, 7, 11]
self.right_possible = [0, 1, 2, 12, 13, 14]
self.up_possible = [4, 8, 12, 7, 11, 15]
def step(self, action: Union[int, np.ndarray]) -> GymStepReturn:
"""
Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Accepts an action and returns a tuple (observation, reward, terminated, truncated, info).
:param action:
:return: tuple (observation, reward, terminated, truncated, info).
"""
if not self.discrete_actions:
action = np.argmax(action) # type: ignore[assignment]
self.count += 1
prev_state = self.state
reward = -0.1
# define state transition
if self.state in self.left_possible and action == 0: # left
self.state -= 1
elif self.state in self.down_possible and action == 1: # down
self.state += self.num_col
elif self.state in self.right_possible and action == 2: # right
self.state += 1
elif self.state in self.up_possible and action == 3: # up
self.state -= self.num_col
got_to_end = self.state == self.max_state
reward = 1.0 if got_to_end else reward
truncated = self.count > self.max_count
terminated = got_to_end
self.log = f"Went {self.action2str[action]} in state {prev_state}, got to state {self.state}"
return self.get_state_mapping(), reward, terminated, truncated, {"got_to_end": got_to_end}
def render(self, mode: str = "human") -> None:
"""
Prints the log of the environment.
:param mode:
"""
print(self.log)
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[Dict[str, np.ndarray], Dict]:
"""
Resets the environment state and step count and returns reset observation.
:param seed:
:return: observation dict {'vec': ..., 'img': ...}
"""
if seed is not None:
super().reset(seed=seed)
self.count = 0
if not self.random_start:
self.state = 0
else:
self.state = np.random.randint(0, self.max_state)
return self.state_mapping[self.state], {}