462 lines
14 KiB
Python
462 lines
14 KiB
Python
import datetime
|
|
import json
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
import zipfile
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Optional, Tuple, Union
|
|
|
|
import gymnasium as gym
|
|
import numpy as np
|
|
import stable_baselines3
|
|
from huggingface_hub import HfApi, upload_folder
|
|
from huggingface_hub.repocard import metadata_eval_result, metadata_save
|
|
from stable_baselines3.common.base_class import BaseAlgorithm
|
|
from stable_baselines3.common.evaluation import evaluate_policy
|
|
from stable_baselines3.common.vec_env import (
|
|
DummyVecEnv,
|
|
VecEnv,
|
|
VecVideoRecorder,
|
|
unwrap_vec_normalize,
|
|
)
|
|
from wasabi import Printer
|
|
|
|
msg = Printer()
|
|
|
|
|
|
def _generate_config(model: BaseAlgorithm, local_path: Path) -> None:
|
|
"""
|
|
Generate a config.json file containing information
|
|
about the agent and the environment
|
|
:param model: name of the model zip file
|
|
:param local_path: path of the local directory
|
|
"""
|
|
unzipped_model_folder = model
|
|
|
|
# Check if the user forgot to mention the extension of the file
|
|
if model.endswith(".zip") is False:
|
|
model += ".zip"
|
|
|
|
# Step 1: Unzip the model
|
|
with zipfile.ZipFile(local_path / model, "r") as zip_ref:
|
|
zip_ref.extractall(local_path / unzipped_model_folder)
|
|
|
|
# Step 2: Get data (JSON containing infos) and read it
|
|
with open(Path.joinpath(local_path, unzipped_model_folder, "data")) as json_file:
|
|
data = json.load(json_file)
|
|
# Add system_info elements to our JSON
|
|
data["system_info"] = stable_baselines3.get_system_info(print_info=False)[0]
|
|
|
|
# Step 3: Write our config.json file
|
|
with open(local_path / "config.json", "w") as outfile:
|
|
json.dump(data, outfile)
|
|
|
|
|
|
def _evaluate_agent(
|
|
model: BaseAlgorithm,
|
|
eval_env: VecEnv,
|
|
n_eval_episodes: int,
|
|
is_deterministic: bool,
|
|
local_path: Path,
|
|
) -> Tuple[float, float]:
|
|
"""
|
|
Evaluate the agent using SB3 evaluate_policy method
|
|
and create a results.json
|
|
|
|
:param model: name of the model object
|
|
:param eval_env: environment used to evaluate the agent
|
|
:param n_eval_episodes: number of evaluation episodes
|
|
:param is_deterministic: use deterministic or stochastic actions
|
|
:param local_path: path of the local repository
|
|
"""
|
|
# Step 1: Evaluate the agent
|
|
mean_reward, std_reward = evaluate_policy(
|
|
model, eval_env, n_eval_episodes, is_deterministic
|
|
)
|
|
|
|
# Step 2: Create json evaluation
|
|
# First get datetime
|
|
eval_datetime = datetime.datetime.now()
|
|
eval_form_datetime = eval_datetime.isoformat()
|
|
|
|
evaluate_data = {
|
|
"mean_reward": mean_reward,
|
|
"std_reward": std_reward,
|
|
"is_deterministic": is_deterministic,
|
|
"n_eval_episodes": n_eval_episodes,
|
|
"eval_datetime": eval_form_datetime,
|
|
}
|
|
|
|
# Step 3: Write a JSON file
|
|
with open(local_path / "results.json", "w") as outfile:
|
|
json.dump(evaluate_data, outfile)
|
|
|
|
return mean_reward, std_reward
|
|
|
|
def entry_point(env_id: str) -> str:
|
|
try:
|
|
return str(gym.envs.registry[env_id].entry_point)
|
|
except KeyError:
|
|
import gym as gym26
|
|
return str(gym26.envs.registry[env_id].entry_point)
|
|
|
|
def is_atari(env_id: str) -> bool:
|
|
"""
|
|
Check if the environment is an Atari one
|
|
(Taken from RL-Baselines3-zoo)
|
|
:param env_id: name of the environment
|
|
"""
|
|
return "AtariEnv" in entry_point(env_id)
|
|
|
|
|
|
def _generate_replay(
|
|
model: BaseAlgorithm,
|
|
eval_env: VecEnv,
|
|
video_length: int,
|
|
is_deterministic: bool,
|
|
local_path: Path,
|
|
):
|
|
"""
|
|
Generate a replay video of the agent
|
|
:param model: trained model
|
|
:param eval_env: environment used to evaluate the agent
|
|
:param video_length: length of the video (in timesteps)
|
|
:param is_deterministic: use deterministic or stochastic actions
|
|
:param local_path: path of the local repository
|
|
"""
|
|
# This is another temporary directory for video outputs
|
|
# SB3 created a -step-0-to-... meta files as well as other
|
|
# artifacts which we don't want in the repo.
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
# Step 1: Create the VecVideoRecorder
|
|
env = VecVideoRecorder(
|
|
eval_env,
|
|
tmpdirname,
|
|
record_video_trigger=lambda x: x == 0,
|
|
video_length=video_length,
|
|
name_prefix="",
|
|
)
|
|
|
|
obs = env.reset()
|
|
lstm_states = None
|
|
episode_starts = np.ones((env.num_envs,), dtype=bool)
|
|
|
|
try:
|
|
for _ in range(video_length):
|
|
action, lstm_states = model.predict(
|
|
obs,
|
|
state=lstm_states,
|
|
episode_start=episode_starts,
|
|
deterministic=is_deterministic,
|
|
)
|
|
obs, _, episode_starts, _ = env.step(action)
|
|
|
|
# Save the video
|
|
env.close()
|
|
|
|
# Convert the video with x264 codec
|
|
inp = env.video_recorder.path
|
|
out = os.path.join(local_path, "replay.mp4")
|
|
os.system(f"ffmpeg -y -i {inp} -vcodec h264 {out}".format(inp, out))
|
|
|
|
except KeyboardInterrupt:
|
|
pass
|
|
except Exception as e:
|
|
msg.fail(str(e))
|
|
# Add a message for video
|
|
msg.fail(
|
|
"We are unable to generate a replay of your agent, "
|
|
"the package_to_hub process continues"
|
|
)
|
|
msg.fail(
|
|
"Please open an issue at "
|
|
"https://github.com/huggingface/huggingface_sb3/issues"
|
|
)
|
|
|
|
|
|
def generate_metadata(
|
|
model_name: str, env_id: str, mean_reward: float, std_reward: float
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Define the tags for the model card
|
|
:param model_name: name of the model
|
|
:param env_id: name of the environment
|
|
:mean_reward: mean reward of the agent
|
|
:std_reward: standard deviation of the mean reward of the agent
|
|
"""
|
|
metadata = {}
|
|
metadata["library_name"] = "stable-baselines3"
|
|
metadata["tags"] = [
|
|
env_id,
|
|
"deep-reinforcement-learning",
|
|
"reinforcement-learning",
|
|
"stable-baselines3",
|
|
]
|
|
|
|
# Add metrics
|
|
eval = metadata_eval_result(
|
|
model_pretty_name=model_name,
|
|
task_pretty_name="reinforcement-learning",
|
|
task_id="reinforcement-learning",
|
|
metrics_pretty_name="mean_reward",
|
|
metrics_id="mean_reward",
|
|
metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
|
|
dataset_pretty_name=env_id,
|
|
dataset_id=env_id,
|
|
)
|
|
|
|
# Merges both dictionaries
|
|
metadata = {**metadata, **eval}
|
|
|
|
return metadata
|
|
|
|
|
|
def _generate_model_card(
|
|
model_name: str, env_id: str, mean_reward: float, std_reward: float
|
|
) -> Tuple[str, Dict[str, Any]]:
|
|
"""
|
|
Generate the model card for the Hub
|
|
:param model_name: name of the model
|
|
:env_id: name of the environment
|
|
:mean_reward: mean reward of the agent
|
|
:std_reward: standard deviation of the mean reward of the agent
|
|
"""
|
|
# Step 1: Select the tags
|
|
metadata = generate_metadata(model_name, env_id, mean_reward, std_reward)
|
|
|
|
# Step 2: Generate the model card
|
|
model_card = f"""
|
|
# **{model_name}** Agent playing **{env_id}**
|
|
This is a trained model of a **{model_name}** agent playing **{env_id}**
|
|
using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
|
|
"""
|
|
|
|
model_card += """
|
|
## Usage (with Stable-baselines3)
|
|
TODO: Add your code
|
|
|
|
|
|
```python
|
|
from stable_baselines3 import ...
|
|
from huggingface_sb3 import load_from_hub
|
|
|
|
...
|
|
```
|
|
"""
|
|
|
|
return model_card, metadata
|
|
|
|
|
|
def _save_model_card(
|
|
local_path: Path, generated_model_card: str, metadata: Dict[str, Any]
|
|
):
|
|
"""Saves a model card for the repository.
|
|
:param local_path: repository directory
|
|
:param generated_model_card: model card generated by _generate_model_card()
|
|
:param metadata: metadata
|
|
"""
|
|
readme_path = local_path / "README.md"
|
|
readme = ""
|
|
if readme_path.exists():
|
|
with readme_path.open("r", encoding="utf8") as f:
|
|
readme = f.read()
|
|
else:
|
|
readme = generated_model_card
|
|
|
|
with readme_path.open("w", encoding="utf-8") as f:
|
|
f.write(readme)
|
|
|
|
# Save our metrics to Readme metadata
|
|
metadata_save(readme_path, metadata)
|
|
|
|
|
|
def _add_logdir(local_path: Path, logdir: Path):
|
|
"""Adds a logdir to the repository.
|
|
:param local_path: repository directory
|
|
:param logdir: logdir directory
|
|
"""
|
|
if logdir.exists() and logdir.is_dir():
|
|
# Add the logdir to the repository under new dir called logs
|
|
repo_logdir = local_path / "logs"
|
|
|
|
# Delete current logs if they exist
|
|
if repo_logdir.exists():
|
|
shutil.rmtree(repo_logdir)
|
|
|
|
# Copy logdir into repo logdir
|
|
shutil.copytree(logdir, repo_logdir)
|
|
|
|
|
|
def package_to_hub(
|
|
model: BaseAlgorithm,
|
|
model_name: str,
|
|
model_architecture: str,
|
|
env_id: str,
|
|
eval_env: Union[VecEnv, gym.Env],
|
|
repo_id: str,
|
|
commit_message: str,
|
|
is_deterministic: bool = True,
|
|
n_eval_episodes=10,
|
|
token: Optional[str] = None,
|
|
video_length=1000,
|
|
logs=None,
|
|
):
|
|
"""
|
|
Evaluate, Generate a video and Upload a model to Hugging Face Hub.
|
|
This method does the complete pipeline:
|
|
- It evaluates the model
|
|
- It generates the model card
|
|
- It generates a replay video of the agent
|
|
- It pushes everything to the hub
|
|
|
|
:param model: trained model
|
|
:param model_name: name of the model zip file
|
|
:param model_architecture: name of the architecture of your model
|
|
(DQN, PPO, A2C, SAC...)
|
|
:param env_id: name of the environment
|
|
:param eval_env: environment used to evaluate the agent
|
|
:param repo_id: id of the model repository from the Hugging Face Hub
|
|
:param commit_message: commit message
|
|
:param is_deterministic: use deterministic or stochastic actions (by default: True)
|
|
:param n_eval_episodes: number of evaluation episodes (by default: 10)
|
|
:param token: authentication token (See https://huggingface.co/settings/token)
|
|
Caution: your token must remain secret. (See https://huggingface.co/docs/hub/security-tokens)
|
|
:param video_length: length of the video (in timesteps)
|
|
:param logs: directory on local machine of tensorboard logs you'd like to upload
|
|
"""
|
|
|
|
# Autowrap, so we only have VecEnv afterward
|
|
if not isinstance(eval_env, VecEnv):
|
|
eval_env = DummyVecEnv([lambda: eval_env])
|
|
|
|
msg.info(
|
|
"This function will save, evaluate, generate a video of your agent, "
|
|
"create a model card and push everything to the hub. "
|
|
"It might take up to 1min. \n "
|
|
"This is a work in progress: if you encounter a bug, please open an issue."
|
|
)
|
|
|
|
repo_url = HfApi().create_repo(
|
|
repo_id=repo_id,
|
|
token=token,
|
|
private=False,
|
|
exist_ok=True,
|
|
)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
tmpdirname = Path(tmpdirname)
|
|
|
|
# Step 1: Save the model
|
|
model.save(tmpdirname / model_name)
|
|
|
|
# Retrieve VecNormalize wrapper if it exists
|
|
# we need to save the statistics
|
|
maybe_vec_normalize = unwrap_vec_normalize(eval_env)
|
|
|
|
# Save the normalization
|
|
if maybe_vec_normalize is not None:
|
|
maybe_vec_normalize.save(tmpdirname / "vec_normalize.pkl")
|
|
# Do not update the stats at test time
|
|
maybe_vec_normalize.training = False
|
|
# Reward normalization is not needed at test time
|
|
maybe_vec_normalize.norm_reward = False
|
|
|
|
# We create two versions of the environment:
|
|
# one for video generation and one for evaluation
|
|
replay_env = eval_env
|
|
|
|
# Deterministic by default (except for Atari)
|
|
if is_deterministic:
|
|
is_deterministic = not is_atari(env_id)
|
|
|
|
# Step 2: Create a config file
|
|
_generate_config(model_name, tmpdirname)
|
|
|
|
# Step 3: Evaluate the agent
|
|
mean_reward, std_reward = _evaluate_agent(
|
|
model, eval_env, n_eval_episodes, is_deterministic, tmpdirname
|
|
)
|
|
|
|
# Step 4: Generate a video
|
|
_generate_replay(model, replay_env, video_length, is_deterministic, tmpdirname)
|
|
|
|
# Step 5: Generate the model card
|
|
generated_model_card, metadata = _generate_model_card(
|
|
model_architecture, env_id, mean_reward, std_reward
|
|
)
|
|
_save_model_card(tmpdirname, generated_model_card, metadata)
|
|
|
|
# Step 6: Add logs if needed
|
|
if logs:
|
|
_add_logdir(tmpdirname, Path(logs))
|
|
|
|
msg.info(f"Pushing repo {repo_id} to the Hugging Face Hub")
|
|
|
|
repo_url = upload_folder(
|
|
repo_id=repo_id,
|
|
folder_path=tmpdirname,
|
|
path_in_repo="",
|
|
commit_message=commit_message,
|
|
token=token,
|
|
)
|
|
|
|
msg.info(
|
|
f"Your model is pushed to the Hub. You can view your model here: {repo_url}"
|
|
)
|
|
return repo_url
|
|
|
|
|
|
def _copy_file(filepath: Path, dst_directory: Path):
|
|
"""
|
|
Copy the file to the correct directory
|
|
:param filepath: path of the file
|
|
:param dst_directory: destination directory
|
|
"""
|
|
dst = dst_directory / filepath.name
|
|
shutil.copy(str(filepath.name), str(dst))
|
|
|
|
|
|
def push_to_hub(
|
|
repo_id: str,
|
|
filename: str,
|
|
commit_message: str,
|
|
token: Optional[str] = None,
|
|
):
|
|
"""
|
|
Upload a model to Hugging Face Hub.
|
|
:param repo_id: repo_id: id of the model repository from the Hugging Face Hub
|
|
:param filename: name of the model zip or mp4 file from the repository
|
|
:param commit_message: commit message
|
|
:param token: authentication token (See https://huggingface.co/settings/token)
|
|
Caution: your token must remain secret. (See https://huggingface.co/docs/hub/security-tokens)
|
|
"""
|
|
|
|
repo_url = HfApi().create_repo(
|
|
repo_id=repo_id,
|
|
token=token,
|
|
private=False,
|
|
exist_ok=True,
|
|
)
|
|
|
|
# Add the model
|
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
tmpdirname = Path(tmpdirname)
|
|
filename_path = os.path.abspath(filename)
|
|
_copy_file(Path(filename_path), tmpdirname)
|
|
_save_model_card(tmpdirname, "", {})
|
|
|
|
msg.info(f"Pushing repo {repo_id} to the Hugging Face Hub")
|
|
repo_url = upload_folder(
|
|
repo_id=repo_id,
|
|
folder_path=tmpdirname,
|
|
path_in_repo="",
|
|
commit_message=commit_message,
|
|
token=token,
|
|
)
|
|
|
|
msg.good(
|
|
f"Your model has been uploaded to the Hub, you can find it here: {repo_url}"
|
|
)
|
|
return repo_url
|