Reinforced-Learning-Godot/rl/Lib/site-packages/huggingface_sb3/push_to_hub.py

import datetime
import json
import os
import shutil
import tempfile
import zipfile
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, Union

import gymnasium as gym
import numpy as np
import stable_baselines3
from huggingface_hub import HfApi, upload_folder
from huggingface_hub.repocard import metadata_eval_result, metadata_save
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import (
    DummyVecEnv,
    VecEnv,
    VecVideoRecorder,
    unwrap_vec_normalize,
)
from wasabi import Printer

msg = Printer()


def _generate_config(model: BaseAlgorithm, local_path: Path) -> None:
    """
    Generate a config.json file containing information
    about the agent and the environment
    :param model: name of the model zip file
    :param local_path: path of the local directory
    """
    unzipped_model_folder = model

    # Check if the user forgot to mention the extension of the file
    if model.endswith(".zip") is False:
        model += ".zip"

    # Step 1: Unzip the model
    with zipfile.ZipFile(local_path / model, "r") as zip_ref:
        zip_ref.extractall(local_path / unzipped_model_folder)

    # Step 2: Get data (JSON containing infos) and read it
    with open(Path.joinpath(local_path, unzipped_model_folder, "data")) as json_file:
        data = json.load(json_file)
        # Add system_info elements to our JSON
        data["system_info"] = stable_baselines3.get_system_info(print_info=False)[0]

    # Step 3: Write our config.json file
    with open(local_path / "config.json", "w") as outfile:
        json.dump(data, outfile)


def _evaluate_agent(
    model: BaseAlgorithm,
    eval_env: VecEnv,
    n_eval_episodes: int,
    is_deterministic: bool,
    local_path: Path,
) -> Tuple[float, float]:
    """
    Evaluate the agent using SB3 evaluate_policy method
    and create a results.json

    :param model: name of the model object
    :param eval_env: environment used to evaluate the agent
    :param n_eval_episodes: number of evaluation episodes
    :param is_deterministic: use deterministic or stochastic actions
    :param local_path: path of the local repository
    """
    # Step 1: Evaluate the agent
    mean_reward, std_reward = evaluate_policy(
        model, eval_env, n_eval_episodes, is_deterministic
    )

    # Step 2: Create json evaluation
    # First get datetime
    eval_datetime = datetime.datetime.now()
    eval_form_datetime = eval_datetime.isoformat()

    evaluate_data = {
        "mean_reward": mean_reward,
        "std_reward": std_reward,
        "is_deterministic": is_deterministic,
        "n_eval_episodes": n_eval_episodes,
        "eval_datetime": eval_form_datetime,
    }

    # Step 3: Write a JSON file
    with open(local_path / "results.json", "w") as outfile:
        json.dump(evaluate_data, outfile)

    return mean_reward, std_reward

def entry_point(env_id: str) -> str:
    try:
        return str(gym.envs.registry[env_id].entry_point)
    except KeyError:
        import gym as gym26
        return str(gym26.envs.registry[env_id].entry_point)

def is_atari(env_id: str) -> bool:
    """
    Check if the environment is an Atari one
    (Taken from RL-Baselines3-zoo)
    :param env_id: name of the environment
    """
    return "AtariEnv" in entry_point(env_id)


def _generate_replay(
    model: BaseAlgorithm,
    eval_env: VecEnv,
    video_length: int,
    is_deterministic: bool,
    local_path: Path,
):
    """
    Generate a replay video of the agent
    :param model: trained model
    :param eval_env: environment used to evaluate the agent
    :param video_length: length of the video (in timesteps)
    :param is_deterministic: use deterministic or stochastic actions
    :param local_path: path of the local repository
    """
    # This is another temporary directory for video outputs
    # SB3 created a -step-0-to-... meta files as well as other
    # artifacts which we don't want in the repo.
    with tempfile.TemporaryDirectory() as tmpdirname:
        # Step 1: Create the VecVideoRecorder
        env = VecVideoRecorder(
            eval_env,
            tmpdirname,
            record_video_trigger=lambda x: x == 0,
            video_length=video_length,
            name_prefix="",
        )

        obs = env.reset()
        lstm_states = None
        episode_starts = np.ones((env.num_envs,), dtype=bool)

        try:
            for _ in range(video_length):
                action, lstm_states = model.predict(
                    obs,
                    state=lstm_states,
                    episode_start=episode_starts,
                    deterministic=is_deterministic,
                )
                obs, _, episode_starts, _ = env.step(action)

            # Save the video
            env.close()

            # Convert the video with x264 codec
            inp = env.video_recorder.path
            out = os.path.join(local_path, "replay.mp4")
            os.system(f"ffmpeg -y -i {inp} -vcodec h264 {out}".format(inp, out))

        except KeyboardInterrupt:
            pass
        except Exception as e:
            msg.fail(str(e))
            # Add a message for video
            msg.fail(
                "We are unable to generate a replay of your agent, "
                "the package_to_hub process continues"
            )
            msg.fail(
                "Please open an issue at "
                "https://github.com/huggingface/huggingface_sb3/issues"
            )


def generate_metadata(
    model_name: str, env_id: str, mean_reward: float, std_reward: float
) -> Dict[str, Any]:
    """
    Define the tags for the model card
    :param model_name: name of the model
    :param env_id: name of the environment
    :mean_reward: mean reward of the agent
    :std_reward: standard deviation of the mean reward of the agent
    """
    metadata = {}
    metadata["library_name"] = "stable-baselines3"
    metadata["tags"] = [
        env_id,
        "deep-reinforcement-learning",
        "reinforcement-learning",
        "stable-baselines3",
    ]

    # Add metrics
    eval = metadata_eval_result(
        model_pretty_name=model_name,
        task_pretty_name="reinforcement-learning",
        task_id="reinforcement-learning",
        metrics_pretty_name="mean_reward",
        metrics_id="mean_reward",
        metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
        dataset_pretty_name=env_id,
        dataset_id=env_id,
    )

    # Merges both dictionaries
    metadata = {**metadata, **eval}

    return metadata


def _generate_model_card(
    model_name: str, env_id: str, mean_reward: float, std_reward: float
) -> Tuple[str, Dict[str, Any]]:
    """
    Generate the model card for the Hub
    :param model_name: name of the model
    :env_id: name of the environment
    :mean_reward: mean reward of the agent
    :std_reward: standard deviation of the mean reward of the agent
    """
    # Step 1: Select the tags
    metadata = generate_metadata(model_name, env_id, mean_reward, std_reward)

    # Step 2: Generate the model card
    model_card = f"""
# **{model_name}** Agent playing **{env_id}**
This is a trained model of a **{model_name}** agent playing **{env_id}**
using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
"""

    model_card += """
## Usage (with Stable-baselines3)
TODO: Add your code


```python
from stable_baselines3 import ...
from huggingface_sb3 import load_from_hub

...
```
"""

    return model_card, metadata


def _save_model_card(
    local_path: Path, generated_model_card: str, metadata: Dict[str, Any]
):
    """Saves a model card for the repository.
    :param local_path: repository directory
    :param generated_model_card: model card generated by _generate_model_card()
    :param metadata: metadata
    """
    readme_path = local_path / "README.md"
    readme = ""
    if readme_path.exists():
        with readme_path.open("r", encoding="utf8") as f:
            readme = f.read()
    else:
        readme = generated_model_card

    with readme_path.open("w", encoding="utf-8") as f:
        f.write(readme)

    # Save our metrics to Readme metadata
    metadata_save(readme_path, metadata)


def _add_logdir(local_path: Path, logdir: Path):
    """Adds a logdir to the repository.
    :param local_path: repository directory
    :param logdir: logdir directory
    """
    if logdir.exists() and logdir.is_dir():
        # Add the logdir to the repository under new dir called logs
        repo_logdir = local_path / "logs"

        # Delete current logs if they exist
        if repo_logdir.exists():
            shutil.rmtree(repo_logdir)

        # Copy logdir into repo logdir
        shutil.copytree(logdir, repo_logdir)


def package_to_hub(
    model: BaseAlgorithm,
    model_name: str,
    model_architecture: str,
    env_id: str,
    eval_env: Union[VecEnv, gym.Env],
    repo_id: str,
    commit_message: str,
    is_deterministic: bool = True,
    n_eval_episodes=10,
    token: Optional[str] = None,
    video_length=1000,
    logs=None,
):
    """
    Evaluate, Generate a video and Upload a model to Hugging Face Hub.
    This method does the complete pipeline:
    - It evaluates the model
    - It generates the model card
    - It generates a replay video of the agent
    - It pushes everything to the hub

    :param model: trained model
    :param model_name: name of the model zip file
    :param model_architecture: name of the architecture of your model
        (DQN, PPO, A2C, SAC...)
    :param env_id: name of the environment
    :param eval_env: environment used to evaluate the agent
    :param repo_id: id of the model repository from the Hugging Face Hub
    :param commit_message: commit message
    :param is_deterministic: use deterministic or stochastic actions (by default: True)
    :param n_eval_episodes: number of evaluation episodes (by default: 10)
    :param token: authentication token (See https://huggingface.co/settings/token)
        Caution: your token must remain secret. (See https://huggingface.co/docs/hub/security-tokens)
    :param video_length: length of the video (in timesteps)
    :param logs: directory on local machine of tensorboard logs you'd like to upload
    """

    # Autowrap, so we only have VecEnv afterward
    if not isinstance(eval_env, VecEnv):
        eval_env = DummyVecEnv([lambda: eval_env])

    msg.info(
        "This function will save, evaluate, generate a video of your agent, "
        "create a model card and push everything to the hub. "
        "It might take up to 1min. \n "
        "This is a work in progress: if you encounter a bug, please open an issue."
    )

    repo_url = HfApi().create_repo(
        repo_id=repo_id,
        token=token,
        private=False,
        exist_ok=True,
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdirname = Path(tmpdirname)

        # Step 1: Save the model
        model.save(tmpdirname / model_name)

        # Retrieve VecNormalize wrapper if it exists
        # we need to save the statistics
        maybe_vec_normalize = unwrap_vec_normalize(eval_env)

        # Save the normalization
        if maybe_vec_normalize is not None:
            maybe_vec_normalize.save(tmpdirname / "vec_normalize.pkl")
            # Do not update the stats at test time
            maybe_vec_normalize.training = False
            # Reward normalization is not needed at test time
            maybe_vec_normalize.norm_reward = False

        # We create two versions of the environment:
        # one for video generation and one for evaluation
        replay_env = eval_env

        # Deterministic by default (except for Atari)
        if is_deterministic:
            is_deterministic = not is_atari(env_id)

        # Step 2: Create a config file
        _generate_config(model_name, tmpdirname)

        # Step 3: Evaluate the agent
        mean_reward, std_reward = _evaluate_agent(
            model, eval_env, n_eval_episodes, is_deterministic, tmpdirname
        )

        # Step 4: Generate a video
        _generate_replay(model, replay_env, video_length, is_deterministic, tmpdirname)

        # Step 5: Generate the model card
        generated_model_card, metadata = _generate_model_card(
            model_architecture, env_id, mean_reward, std_reward
        )
        _save_model_card(tmpdirname, generated_model_card, metadata)

        # Step 6: Add logs if needed
        if logs:
            _add_logdir(tmpdirname, Path(logs))

        msg.info(f"Pushing repo {repo_id} to the Hugging Face Hub")

        repo_url = upload_folder(
            repo_id=repo_id,
            folder_path=tmpdirname,
            path_in_repo="",
            commit_message=commit_message,
            token=token,
        )

        msg.info(
            f"Your model is pushed to the Hub. You can view your model here: {repo_url}"
        )
    return repo_url


def _copy_file(filepath: Path, dst_directory: Path):
    """
    Copy the file to the correct directory
    :param filepath: path of the file
    :param dst_directory: destination directory
    """
    dst = dst_directory / filepath.name
    shutil.copy(str(filepath.name), str(dst))


def push_to_hub(
    repo_id: str,
    filename: str,
    commit_message: str,
    token: Optional[str] = None,
):
    """
    Upload a model to Hugging Face Hub.
    :param repo_id: repo_id: id of the model repository from the Hugging Face Hub
    :param filename: name of the model zip or mp4 file from the repository
    :param commit_message: commit message
    :param token: authentication token (See https://huggingface.co/settings/token)
        Caution: your token must remain secret. (See https://huggingface.co/docs/hub/security-tokens)
    """

    repo_url = HfApi().create_repo(
        repo_id=repo_id,
        token=token,
        private=False,
        exist_ok=True,
    )

    # Add the model
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdirname = Path(tmpdirname)
        filename_path = os.path.abspath(filename)
        _copy_file(Path(filename_path), tmpdirname)
        _save_model_card(tmpdirname, "", {})

        msg.info(f"Pushing repo {repo_id} to the Hugging Face Hub")
        repo_url = upload_folder(
            repo_id=repo_id,
            folder_path=tmpdirname,
            path_in_repo="",
            commit_message=commit_message,
            token=token,
        )

    msg.good(
        f"Your model has been uploaded to the Hub, you can find it here: {repo_url}"
    )
    return repo_url