I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,57 @@
import re
import time
from pathlib import Path
from urllib.error import HTTPError
from urllib.request import urlopen
__all__ = ["DOWNLOAD_DIR", "retrieve_file", "output_file", "urls_from_file"]
NAME_REMOVE = ("http://", "https://", "github.com/", "/raw/")
DOWNLOAD_DIR = Path(__file__).parent
# ----------------------------------------------------------------------
# Please update ./preload.py accordingly when modifying this file
# ----------------------------------------------------------------------
def output_file(url: str, download_dir: Path = DOWNLOAD_DIR):
file_name = url.strip()
for part in NAME_REMOVE:
file_name = file_name.replace(part, '').strip().strip('/:').strip()
return Path(download_dir, re.sub(r"[^\-_\.\w\d]+", "_", file_name))
def retrieve_file(url: str, download_dir: Path = DOWNLOAD_DIR, wait: float = 5):
path = output_file(url, download_dir)
if path.exists():
print(f"Skipping {url} (already exists: {path})")
else:
download_dir.mkdir(exist_ok=True, parents=True)
print(f"Downloading {url} to {path}")
try:
download(url, path)
except HTTPError:
time.sleep(wait) # wait a few seconds and try again.
download(url, path)
return path
def urls_from_file(list_file: Path):
"""``list_file`` should be a text file where each line corresponds to a URL to
download.
"""
print(f"file: {list_file}")
content = list_file.read_text(encoding="utf-8")
return [url for url in content.splitlines() if not url.startswith("#")]
def download(url: str, dest: Path):
with urlopen(url) as f:
data = f.read()
with open(dest, "wb") as f:
f.write(data)
assert Path(dest).exists()

View File

@ -0,0 +1,18 @@
"""This file can be used to preload files needed for testing.
For example you can use::
cd setuptools/tests/config
python -m downloads.preload setupcfg_examples.txt
to make sure the `setup.cfg` examples are downloaded before starting the tests.
"""
import sys
from pathlib import Path
from . import retrieve_file, urls_from_file
if __name__ == "__main__":
urls = urls_from_file(Path(sys.argv[1]))
list(map(retrieve_file, urls))