I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,287 @@
import os
from hashlib import md5
import pytest
from fsspec.implementations.local import LocalFileSystem
from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401
from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401
from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401
class BaseAbstractFixtures:
"""
Abstract base class containing fixtures that are used by but never need to
be overridden in derived filesystem-specific classes to run the abstract
tests on such filesystems.
"""
@pytest.fixture
def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
"""
Scenario on remote filesystem that is used for many cp/get/put tests.
Cleans up at the end of each test it which it is used.
"""
source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
yield source
fs.rm(source, recursive=True)
@pytest.fixture
def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
"""
Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
Cleans up at the end of each test it which it is used.
"""
source = self._glob_edge_cases_files(fs, fs_join, fs_path)
yield source
fs.rm(source, recursive=True)
@pytest.fixture
def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
"""
Scenario on remote filesystem that is used to check cp/get/put on directory
and file with the same name prefixes.
Cleans up at the end of each test it which it is used.
"""
source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
yield source
fs.rm(source, recursive=True)
@pytest.fixture
def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
"""
Scenario on remote filesystem that is used to check cp/get/put files order
when source and destination are lists.
Cleans up at the end of each test it which it is used.
"""
source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
yield source
fs.rm(source, recursive=True)
@pytest.fixture
def fs_target(self, fs, fs_join, fs_path):
"""
Return name of remote directory that does not yet exist to copy into.
Cleans up at the end of each test it which it is used.
"""
target = fs_join(fs_path, "target")
yield target
if fs.exists(target):
fs.rm(target, recursive=True)
@pytest.fixture
def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
"""
Scenario on local filesystem that is used for many cp/get/put tests.
Cleans up at the end of each test it which it is used.
"""
source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
yield source
local_fs.rm(source, recursive=True)
@pytest.fixture
def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
"""
Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
Cleans up at the end of each test it which it is used.
"""
source = self._glob_edge_cases_files(local_fs, local_join, local_path)
yield source
local_fs.rm(source, recursive=True)
@pytest.fixture
def local_dir_and_file_with_same_name_prefix(
self, local_fs, local_join, local_path
):
"""
Scenario on local filesystem that is used to check cp/get/put on directory
and file with the same name prefixes.
Cleans up at the end of each test it which it is used.
"""
source = self._dir_and_file_with_same_name_prefix(
local_fs, local_join, local_path
)
yield source
local_fs.rm(source, recursive=True)
@pytest.fixture
def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
"""
Scenario on local filesystem that is used to check cp/get/put files order
when source and destination are lists.
Cleans up at the end of each test it which it is used.
"""
source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
yield source
local_fs.rm(source, recursive=True)
@pytest.fixture
def local_target(self, local_fs, local_join, local_path):
"""
Return name of local directory that does not yet exist to copy into.
Cleans up at the end of each test it which it is used.
"""
target = local_join(local_path, "target")
yield target
if local_fs.exists(target):
local_fs.rm(target, recursive=True)
def _glob_edge_cases_files(self, some_fs, some_join, some_path):
"""
Scenario that is used for glob edge cases cp/get/put tests.
Creates the following directory and file structure:
📁 source
├── 📄 file1
├── 📄 file2
├── 📁 subdir0
│ ├── 📄 subfile1
│ ├── 📄 subfile2
│ └── 📁 nesteddir
│ └── 📄 nestedfile
└── 📁 subdir1
├── 📄 subfile1
├── 📄 subfile2
└── 📁 nesteddir
└── 📄 nestedfile
"""
source = some_join(some_path, "source")
some_fs.touch(some_join(source, "file1"))
some_fs.touch(some_join(source, "file2"))
for subdir_idx in range(2):
subdir = some_join(source, f"subdir{subdir_idx}")
nesteddir = some_join(subdir, "nesteddir")
some_fs.makedirs(nesteddir)
some_fs.touch(some_join(subdir, "subfile1"))
some_fs.touch(some_join(subdir, "subfile2"))
some_fs.touch(some_join(nesteddir, "nestedfile"))
return source
def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
"""
Scenario that is used for many cp/get/put tests. Creates the following
directory and file structure:
📁 source
├── 📄 file1
├── 📄 file2
└── 📁 subdir
├── 📄 subfile1
├── 📄 subfile2
└── 📁 nesteddir
└── 📄 nestedfile
"""
source = some_join(some_path, "source")
subdir = some_join(source, "subdir")
nesteddir = some_join(subdir, "nesteddir")
some_fs.makedirs(nesteddir)
some_fs.touch(some_join(source, "file1"))
some_fs.touch(some_join(source, "file2"))
some_fs.touch(some_join(subdir, "subfile1"))
some_fs.touch(some_join(subdir, "subfile2"))
some_fs.touch(some_join(nesteddir, "nestedfile"))
return source
def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
"""
Scenario that is used to check cp/get/put on directory and file with
the same name prefixes. Creates the following directory and file structure:
📁 source
├── 📄 subdir.txt
└── 📁 subdir
└── 📄 subfile.txt
"""
source = some_join(some_path, "source")
subdir = some_join(source, "subdir")
file = some_join(source, "subdir.txt")
subfile = some_join(subdir, "subfile.txt")
some_fs.makedirs(subdir)
some_fs.touch(file)
some_fs.touch(subfile)
return source
def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
"""
Scenario that is used to check cp/get/put files order when source and
destination are lists. Creates the following directory and file structure:
📁 source
└── 📄 {hashed([0-9])}.txt
"""
source = some_join(some_path, "source")
for i in range(10):
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
path = some_join(source, f"{hashed_i}.txt")
some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
return source
class AbstractFixtures(BaseAbstractFixtures):
"""
Abstract base class containing fixtures that may be overridden in derived
filesystem-specific classes to run the abstract tests on such filesystems.
For any particular filesystem some of these fixtures must be overridden,
such as ``fs`` and ``fs_path``, and others may be overridden if the
default functions here are not appropriate, such as ``fs_join``.
"""
@pytest.fixture
def fs(self):
raise NotImplementedError("This function must be overridden in derived classes")
@pytest.fixture
def fs_join(self):
"""
Return a function that joins its arguments together into a path.
Most fsspec implementations join paths in a platform-dependent way,
but some will override this to always use a forward slash.
"""
return os.path.join
@pytest.fixture
def fs_path(self):
raise NotImplementedError("This function must be overridden in derived classes")
@pytest.fixture(scope="class")
def local_fs(self):
# Maybe need an option for auto_mkdir=False? This is only relevant
# for certain implementations.
return LocalFileSystem(auto_mkdir=True)
@pytest.fixture
def local_join(self):
"""
Return a function that joins its arguments together into a path, on
the local filesystem.
"""
return os.path.join
@pytest.fixture
def local_path(self, tmpdir):
return tmpdir
@pytest.fixture
def supports_empty_directories(self):
"""
Return whether this implementation supports empty directories.
"""
return True
@pytest.fixture
def fs_sanitize_path(self):
return lambda x: x

View File

@ -0,0 +1,175 @@
GLOB_EDGE_CASES_TESTS = {
"argnames": ("path", "recursive", "maxdepth", "expected"),
"argvalues": [
("fil?1", False, None, ["file1"]),
("fil?1", True, None, ["file1"]),
("file[1-2]", False, None, ["file1", "file2"]),
("file[1-2]", True, None, ["file1", "file2"]),
("*", False, None, ["file1", "file2"]),
(
"*",
True,
None,
[
"file1",
"file2",
"subdir0/subfile1",
"subdir0/subfile2",
"subdir0/nesteddir/nestedfile",
"subdir1/subfile1",
"subdir1/subfile2",
"subdir1/nesteddir/nestedfile",
],
),
("*", True, 1, ["file1", "file2"]),
(
"*",
True,
2,
[
"file1",
"file2",
"subdir0/subfile1",
"subdir0/subfile2",
"subdir1/subfile1",
"subdir1/subfile2",
],
),
("*1", False, None, ["file1"]),
(
"*1",
True,
None,
[
"file1",
"subdir1/subfile1",
"subdir1/subfile2",
"subdir1/nesteddir/nestedfile",
],
),
("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
(
"**",
False,
None,
[
"file1",
"file2",
"subdir0/subfile1",
"subdir0/subfile2",
"subdir0/nesteddir/nestedfile",
"subdir1/subfile1",
"subdir1/subfile2",
"subdir1/nesteddir/nestedfile",
],
),
(
"**",
True,
None,
[
"file1",
"file2",
"subdir0/subfile1",
"subdir0/subfile2",
"subdir0/nesteddir/nestedfile",
"subdir1/subfile1",
"subdir1/subfile2",
"subdir1/nesteddir/nestedfile",
],
),
("**", True, 1, ["file1", "file2"]),
(
"**",
True,
2,
[
"file1",
"file2",
"subdir0/subfile1",
"subdir0/subfile2",
"subdir0/nesteddir/nestedfile",
"subdir1/subfile1",
"subdir1/subfile2",
"subdir1/nesteddir/nestedfile",
],
),
(
"**",
False,
2,
[
"file1",
"file2",
"subdir0/subfile1",
"subdir0/subfile2",
"subdir1/subfile1",
"subdir1/subfile2",
],
),
("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
(
"**/*1",
True,
None,
[
"file1",
"subdir0/subfile1",
"subdir1/subfile1",
"subdir1/subfile2",
"subdir1/nesteddir/nestedfile",
],
),
("**/*1", True, 1, ["file1"]),
(
"**/*1",
True,
2,
["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
),
("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
("**/subdir0", False, None, []),
("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
("**/subdir0/nested*", False, 2, []),
("**/subdir0/nested*", True, 2, ["nestedfile"]),
("subdir[1-2]", False, None, []),
("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
("subdir[0-1]", False, None, []),
(
"subdir[0-1]",
True,
None,
[
"subdir0/subfile1",
"subdir0/subfile2",
"subdir0/nesteddir/nestedfile",
"subdir1/subfile1",
"subdir1/subfile2",
"subdir1/nesteddir/nestedfile",
],
),
(
"subdir[0-1]/*fil[e]*",
False,
None,
[
"subdir0/subfile1",
"subdir0/subfile2",
"subdir1/subfile1",
"subdir1/subfile2",
],
),
(
"subdir[0-1]/*fil[e]*",
True,
None,
[
"subdir0/subfile1",
"subdir0/subfile2",
"subdir1/subfile1",
"subdir1/subfile2",
],
),
],
}

View File

@ -0,0 +1,557 @@
from hashlib import md5
from itertools import product
import pytest
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
class AbstractCopyTests:
def test_copy_file_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
fs_target,
supports_empty_directories,
):
# Copy scenario 1a
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
fs.touch(fs_join(target, "dummy"))
assert fs.isdir(target)
target_file2 = fs_join(target, "file2")
target_subfile1 = fs_join(target, "subfile1")
# Copy from source directory
fs.cp(fs_join(source, "file2"), target)
assert fs.isfile(target_file2)
# Copy from sub directory
fs.cp(fs_join(source, "subdir", "subfile1"), target)
assert fs.isfile(target_subfile1)
# Remove copied files
fs.rm([target_file2, target_subfile1])
assert not fs.exists(target_file2)
assert not fs.exists(target_subfile1)
# Repeat with trailing slash on target
fs.cp(fs_join(source, "file2"), target + "/")
assert fs.isdir(target)
assert fs.isfile(target_file2)
fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
assert fs.isfile(target_subfile1)
def test_copy_file_to_new_directory(
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
):
# Copy scenario 1b
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
fs.cp(
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
) # Note trailing slash
assert fs.isdir(target)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
def test_copy_file_to_file_in_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
fs_target,
supports_empty_directories,
):
# Copy scenario 1c
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
fs.touch(fs_join(target, "dummy"))
assert fs.isdir(target)
fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
assert fs.isfile(fs_join(target, "newfile"))
def test_copy_file_to_file_in_new_directory(
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
):
# Copy scenario 1d
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
fs.cp(
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "newfile"))
def test_copy_directory_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
fs_target,
supports_empty_directories,
):
# Copy scenario 1e
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
dummy = fs_join(target, "dummy")
fs.touch(dummy)
assert fs.isdir(target)
for source_slash, target_slash in zip([False, True], [False, True]):
s = fs_join(source, "subdir")
if source_slash:
s += "/"
t = target + "/" if target_slash else target
# Without recursive does nothing
fs.cp(s, t)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# With recursive
fs.cp(s, t, recursive=True)
if source_slash:
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert fs.isdir(fs_join(target, "nesteddir"))
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
fs_join(target, "nesteddir"),
],
recursive=True,
)
else:
assert fs.isdir(fs_join(target, "subdir"))
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
fs.rm(fs_join(target, "subdir"), recursive=True)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# Limit recursive by maxdepth
fs.cp(s, t, recursive=True, maxdepth=1)
if source_slash:
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.exists(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
],
recursive=True,
)
else:
assert fs.isdir(fs_join(target, "subdir"))
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
fs.rm(fs_join(target, "subdir"), recursive=True)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
def test_copy_directory_to_new_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
fs_target,
supports_empty_directories,
):
# Copy scenario 1f
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
for source_slash, target_slash in zip([False, True], [False, True]):
s = fs_join(source, "subdir")
if source_slash:
s += "/"
t = fs_join(target, "newdir")
if target_slash:
t += "/"
# Without recursive does nothing
fs.cp(s, t)
if supports_empty_directories:
assert fs.ls(target) == []
else:
with pytest.raises(FileNotFoundError):
fs.ls(target)
# With recursive
fs.cp(s, t, recursive=True)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
# Limit recursive by maxdepth
fs.cp(s, t, recursive=True, maxdepth=1)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
def test_copy_glob_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
fs_target,
supports_empty_directories,
):
# Copy scenario 1g
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
dummy = fs_join(target, "dummy")
fs.touch(dummy)
assert fs.isdir(target)
for target_slash in [False, True]:
t = target + "/" if target_slash else target
# Without recursive
fs.cp(fs_join(source, "subdir", "*"), t)
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.isdir(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# With recursive
for glob, recursive in zip(["*", "**"], [True, False]):
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert fs.isdir(fs_join(target, "nesteddir"))
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
fs_join(target, "nesteddir"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# Limit recursive by maxdepth
fs.cp(
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
)
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.exists(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
def test_copy_glob_to_new_directory(
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
):
# Copy scenario 1h
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
for target_slash in [False, True]:
t = fs_join(target, "newdir")
if target_slash:
t += "/"
# Without recursive
fs.cp(fs_join(source, "subdir", "*"), t)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
assert not fs.exists(fs_join(target, "newdir", "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
# With recursive
for glob, recursive in zip(["*", "**"], [True, False]):
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
assert not fs.exists(fs_join(target, "newdir", "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
# Limit recursive by maxdepth
fs.cp(
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
assert not fs.exists(fs_join(target, "newdir", "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
@pytest.mark.parametrize(
GLOB_EDGE_CASES_TESTS["argnames"],
GLOB_EDGE_CASES_TESTS["argvalues"],
)
def test_copy_glob_edge_cases(
self,
path,
recursive,
maxdepth,
expected,
fs,
fs_join,
fs_glob_edge_cases_files,
fs_target,
fs_sanitize_path,
):
# Copy scenario 1g
source = fs_glob_edge_cases_files
target = fs_target
for new_dir, target_slash in product([True, False], [True, False]):
fs.mkdir(target)
t = fs_join(target, "newdir") if new_dir else target
t = t + "/" if target_slash else t
fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
output = fs.find(target)
if new_dir:
prefixed_expected = [
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
]
else:
prefixed_expected = [
fs_sanitize_path(fs_join(target, p)) for p in expected
]
assert sorted(output) == sorted(prefixed_expected)
try:
fs.rm(target, recursive=True)
except FileNotFoundError:
pass
def test_copy_list_of_files_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
fs_target,
supports_empty_directories,
):
# Copy scenario 2a
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
dummy = fs_join(target, "dummy")
fs.touch(dummy)
assert fs.isdir(target)
source_files = [
fs_join(source, "file1"),
fs_join(source, "file2"),
fs_join(source, "subdir", "subfile1"),
]
for target_slash in [False, True]:
t = target + "/" if target_slash else target
fs.cp(source_files, t)
assert fs.isfile(fs_join(target, "file1"))
assert fs.isfile(fs_join(target, "file2"))
assert fs.isfile(fs_join(target, "subfile1"))
fs.rm(
[
fs_join(target, "file1"),
fs_join(target, "file2"),
fs_join(target, "subfile1"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
def test_copy_list_of_files_to_new_directory(
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
):
# Copy scenario 2b
source = fs_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
source_files = [
fs_join(source, "file1"),
fs_join(source, "file2"),
fs_join(source, "subdir", "subfile1"),
]
fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "file1"))
assert fs.isfile(fs_join(target, "newdir", "file2"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
def test_copy_two_files_new_directory(
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
):
# This is a duplicate of test_copy_list_of_files_to_new_directory and
# can eventually be removed.
source = fs_bulk_operations_scenario_0
target = fs_target
assert not fs.exists(target)
fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
assert fs.isdir(target)
assert fs.isfile(fs_join(target, "file1"))
assert fs.isfile(fs_join(target, "file2"))
def test_copy_directory_without_files_with_same_name_prefix(
self,
fs,
fs_join,
fs_target,
fs_dir_and_file_with_same_name_prefix,
supports_empty_directories,
):
# Create the test dirs
source = fs_dir_and_file_with_same_name_prefix
target = fs_target
# Test without glob
fs.cp(fs_join(source, "subdir"), target, recursive=True)
assert fs.isfile(fs_join(target, "subfile.txt"))
assert not fs.isfile(fs_join(target, "subdir.txt"))
fs.rm([fs_join(target, "subfile.txt")])
if supports_empty_directories:
assert fs.ls(target) == []
else:
assert not fs.exists(target)
# Test with glob
fs.cp(fs_join(source, "subdir*"), target, recursive=True)
assert fs.isdir(fs_join(target, "subdir"))
assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
assert fs.isfile(fs_join(target, "subdir.txt"))
def test_copy_with_source_and_destination_as_list(
self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
):
# Create the test dir
source = fs_10_files_with_hashed_names
target = fs_target
# Create list of files for source and destination
source_files = []
destination_files = []
for i in range(10):
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
source_files.append(fs_join(source, f"{hashed_i}.txt"))
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
# Copy and assert order was kept
fs.copy(path1=source_files, path2=destination_files)
for i in range(10):
file_content = fs.cat(destination_files[i]).decode("utf-8")
assert file_content == str(i)

View File

@ -0,0 +1,587 @@
from hashlib import md5
from itertools import product
import pytest
from fsspec.implementations.local import make_path_posix
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
class AbstractGetTests:
def test_get_file_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1a
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
assert local_fs.isdir(target)
target_file2 = local_join(target, "file2")
target_subfile1 = local_join(target, "subfile1")
# Copy from source directory
fs.get(fs_join(source, "file2"), target)
assert local_fs.isfile(target_file2)
# Copy from sub directory
fs.get(fs_join(source, "subdir", "subfile1"), target)
assert local_fs.isfile(target_subfile1)
# Remove copied files
local_fs.rm([target_file2, target_subfile1])
assert not local_fs.exists(target_file2)
assert not local_fs.exists(target_subfile1)
# Repeat with trailing slash on target
fs.get(fs_join(source, "file2"), target + "/")
assert local_fs.isdir(target)
assert local_fs.isfile(target_file2)
fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
assert local_fs.isfile(target_subfile1)
def test_get_file_to_new_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1b
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
fs.get(
fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
) # Note trailing slash
assert local_fs.isdir(target)
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
def test_get_file_to_file_in_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1c
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
assert local_fs.isfile(local_join(target, "newfile"))
def test_get_file_to_file_in_new_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1d
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
fs.get(
fs_join(source, "subdir", "subfile1"),
local_join(target, "newdir", "newfile"),
)
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "newfile"))
def test_get_directory_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1e
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
assert local_fs.isdir(target)
for source_slash, target_slash in zip([False, True], [False, True]):
s = fs_join(source, "subdir")
if source_slash:
s += "/"
t = target + "/" if target_slash else target
# Without recursive does nothing
fs.get(s, t)
assert local_fs.ls(target) == []
# With recursive
fs.get(s, t, recursive=True)
if source_slash:
assert local_fs.isfile(local_join(target, "subfile1"))
assert local_fs.isfile(local_join(target, "subfile2"))
assert local_fs.isdir(local_join(target, "nesteddir"))
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
assert not local_fs.exists(local_join(target, "subdir"))
local_fs.rm(
[
local_join(target, "subfile1"),
local_join(target, "subfile2"),
local_join(target, "nesteddir"),
],
recursive=True,
)
else:
assert local_fs.isdir(local_join(target, "subdir"))
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
assert local_fs.isfile(
local_join(target, "subdir", "nesteddir", "nestedfile")
)
local_fs.rm(local_join(target, "subdir"), recursive=True)
assert local_fs.ls(target) == []
# Limit recursive by maxdepth
fs.get(s, t, recursive=True, maxdepth=1)
if source_slash:
assert local_fs.isfile(local_join(target, "subfile1"))
assert local_fs.isfile(local_join(target, "subfile2"))
assert not local_fs.exists(local_join(target, "nesteddir"))
assert not local_fs.exists(local_join(target, "subdir"))
local_fs.rm(
[
local_join(target, "subfile1"),
local_join(target, "subfile2"),
],
recursive=True,
)
else:
assert local_fs.isdir(local_join(target, "subdir"))
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
local_fs.rm(local_join(target, "subdir"), recursive=True)
assert local_fs.ls(target) == []
def test_get_directory_to_new_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1f
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
for source_slash, target_slash in zip([False, True], [False, True]):
s = fs_join(source, "subdir")
if source_slash:
s += "/"
t = local_join(target, "newdir")
if target_slash:
t += "/"
# Without recursive does nothing
fs.get(s, t)
assert local_fs.ls(target) == []
# With recursive
fs.get(s, t, recursive=True)
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
assert local_fs.isfile(
local_join(target, "newdir", "nesteddir", "nestedfile")
)
assert not local_fs.exists(local_join(target, "subdir"))
local_fs.rm(local_join(target, "newdir"), recursive=True)
assert local_fs.ls(target) == []
# Limit recursive by maxdepth
fs.get(s, t, recursive=True, maxdepth=1)
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
assert not local_fs.exists(local_join(target, "subdir"))
local_fs.rm(local_join(target, "newdir"), recursive=True)
assert not local_fs.exists(local_join(target, "newdir"))
def test_get_glob_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1g
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
for target_slash in [False, True]:
t = target + "/" if target_slash else target
# Without recursive
fs.get(fs_join(source, "subdir", "*"), t)
assert local_fs.isfile(local_join(target, "subfile1"))
assert local_fs.isfile(local_join(target, "subfile2"))
assert not local_fs.isdir(local_join(target, "nesteddir"))
assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
assert not local_fs.exists(local_join(target, "subdir"))
local_fs.rm(
[
local_join(target, "subfile1"),
local_join(target, "subfile2"),
],
recursive=True,
)
assert local_fs.ls(target) == []
# With recursive
for glob, recursive in zip(["*", "**"], [True, False]):
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
assert local_fs.isfile(local_join(target, "subfile1"))
assert local_fs.isfile(local_join(target, "subfile2"))
assert local_fs.isdir(local_join(target, "nesteddir"))
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
assert not local_fs.exists(local_join(target, "subdir"))
local_fs.rm(
[
local_join(target, "subfile1"),
local_join(target, "subfile2"),
local_join(target, "nesteddir"),
],
recursive=True,
)
assert local_fs.ls(target) == []
# Limit recursive by maxdepth
fs.get(
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
)
assert local_fs.isfile(local_join(target, "subfile1"))
assert local_fs.isfile(local_join(target, "subfile2"))
assert not local_fs.exists(local_join(target, "nesteddir"))
assert not local_fs.exists(local_join(target, "subdir"))
local_fs.rm(
[
local_join(target, "subfile1"),
local_join(target, "subfile2"),
],
recursive=True,
)
assert local_fs.ls(target) == []
def test_get_glob_to_new_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 1h
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
for target_slash in [False, True]:
t = fs_join(target, "newdir")
if target_slash:
t += "/"
# Without recursive
fs.get(fs_join(source, "subdir", "*"), t)
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
assert not local_fs.exists(
local_join(target, "newdir", "nesteddir", "nestedfile")
)
assert not local_fs.exists(local_join(target, "subdir"))
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
local_fs.rm(local_join(target, "newdir"), recursive=True)
assert local_fs.ls(target) == []
# With recursive
for glob, recursive in zip(["*", "**"], [True, False]):
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
assert local_fs.isfile(
local_join(target, "newdir", "nesteddir", "nestedfile")
)
assert not local_fs.exists(local_join(target, "subdir"))
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
local_fs.rm(local_join(target, "newdir"), recursive=True)
assert not local_fs.exists(local_join(target, "newdir"))
# Limit recursive by maxdepth
fs.get(
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
)
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
assert not local_fs.exists(local_join(target, "subdir"))
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
assert not local_fs.exists(local_join(target, "newdir"))
@pytest.mark.parametrize(
GLOB_EDGE_CASES_TESTS["argnames"],
GLOB_EDGE_CASES_TESTS["argvalues"],
)
def test_get_glob_edge_cases(
self,
path,
recursive,
maxdepth,
expected,
fs,
fs_join,
fs_glob_edge_cases_files,
local_fs,
local_join,
local_target,
):
# Copy scenario 1g
source = fs_glob_edge_cases_files
target = local_target
for new_dir, target_slash in product([True, False], [True, False]):
local_fs.mkdir(target)
t = local_join(target, "newdir") if new_dir else target
t = t + "/" if target_slash else t
fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
output = local_fs.find(target)
if new_dir:
prefixed_expected = [
make_path_posix(local_join(target, "newdir", p)) for p in expected
]
else:
prefixed_expected = [
make_path_posix(local_join(target, p)) for p in expected
]
assert sorted(output) == sorted(prefixed_expected)
try:
local_fs.rm(target, recursive=True)
except FileNotFoundError:
pass
def test_get_list_of_files_to_existing_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 2a
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
source_files = [
fs_join(source, "file1"),
fs_join(source, "file2"),
fs_join(source, "subdir", "subfile1"),
]
for target_slash in [False, True]:
t = target + "/" if target_slash else target
fs.get(source_files, t)
assert local_fs.isfile(local_join(target, "file1"))
assert local_fs.isfile(local_join(target, "file2"))
assert local_fs.isfile(local_join(target, "subfile1"))
local_fs.rm(
[
local_join(target, "file1"),
local_join(target, "file2"),
local_join(target, "subfile1"),
],
recursive=True,
)
assert local_fs.ls(target) == []
def test_get_list_of_files_to_new_directory(
self,
fs,
fs_join,
fs_bulk_operations_scenario_0,
local_fs,
local_join,
local_target,
):
# Copy scenario 2b
source = fs_bulk_operations_scenario_0
target = local_target
local_fs.mkdir(target)
source_files = [
fs_join(source, "file1"),
fs_join(source, "file2"),
fs_join(source, "subdir", "subfile1"),
]
fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
assert local_fs.isdir(local_join(target, "newdir"))
assert local_fs.isfile(local_join(target, "newdir", "file1"))
assert local_fs.isfile(local_join(target, "newdir", "file2"))
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
def test_get_directory_recursive(
self, fs, fs_join, fs_path, local_fs, local_join, local_target
):
# https://github.com/fsspec/filesystem_spec/issues/1062
# Recursive cp/get/put of source directory into non-existent target directory.
src = fs_join(fs_path, "src")
src_file = fs_join(src, "file")
fs.mkdir(src)
fs.touch(src_file)
target = local_target
# get without slash
assert not local_fs.exists(target)
for loop in range(2):
fs.get(src, target, recursive=True)
assert local_fs.isdir(target)
if loop == 0:
assert local_fs.isfile(local_join(target, "file"))
assert not local_fs.exists(local_join(target, "src"))
else:
assert local_fs.isfile(local_join(target, "file"))
assert local_fs.isdir(local_join(target, "src"))
assert local_fs.isfile(local_join(target, "src", "file"))
local_fs.rm(target, recursive=True)
# get with slash
assert not local_fs.exists(target)
for loop in range(2):
fs.get(src + "/", target, recursive=True)
assert local_fs.isdir(target)
assert local_fs.isfile(local_join(target, "file"))
assert not local_fs.exists(local_join(target, "src"))
def test_get_directory_without_files_with_same_name_prefix(
self,
fs,
fs_join,
local_fs,
local_join,
local_target,
fs_dir_and_file_with_same_name_prefix,
):
# Create the test dirs
source = fs_dir_and_file_with_same_name_prefix
target = local_target
# Test without glob
fs.get(fs_join(source, "subdir"), target, recursive=True)
assert local_fs.isfile(local_join(target, "subfile.txt"))
assert not local_fs.isfile(local_join(target, "subdir.txt"))
local_fs.rm([local_join(target, "subfile.txt")])
assert local_fs.ls(target) == []
# Test with glob
fs.get(fs_join(source, "subdir*"), target, recursive=True)
assert local_fs.isdir(local_join(target, "subdir"))
assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
assert local_fs.isfile(local_join(target, "subdir.txt"))
def test_get_with_source_and_destination_as_list(
self,
fs,
fs_join,
local_fs,
local_join,
local_target,
fs_10_files_with_hashed_names,
):
# Create the test dir
source = fs_10_files_with_hashed_names
target = local_target
# Create list of files for source and destination
source_files = []
destination_files = []
for i in range(10):
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
source_files.append(fs_join(source, f"{hashed_i}.txt"))
destination_files.append(
make_path_posix(local_join(target, f"{hashed_i}.txt"))
)
# Copy and assert order was kept
fs.get(rpath=source_files, lpath=destination_files)
for i in range(10):
file_content = local_fs.cat(destination_files[i]).decode("utf-8")
assert file_content == str(i)

View File

@ -0,0 +1,57 @@
import os
import pytest
import fsspec
def test_move_raises_error_with_tmpdir(tmpdir):
# Create a file in the temporary directory
source = tmpdir.join("source_file.txt")
source.write("content")
# Define a destination that simulates a protected or invalid path
destination = tmpdir.join("non_existent_directory/destination_file.txt")
# Instantiate the filesystem (assuming the local file system interface)
fs = fsspec.filesystem("file")
# Use the actual file paths as string
with pytest.raises(FileNotFoundError):
fs.mv(str(source), str(destination))
@pytest.mark.parametrize("recursive", (True, False))
def test_move_raises_error_with_tmpdir_permission(recursive, tmpdir):
# Create a file in the temporary directory
source = tmpdir.join("source_file.txt")
source.write("content")
# Create a protected directory (non-writable)
protected_dir = tmpdir.mkdir("protected_directory")
protected_path = str(protected_dir)
# Set the directory to read-only
if os.name == "nt":
os.system(f'icacls "{protected_path}" /deny Everyone:(W)')
else:
os.chmod(protected_path, 0o555) # Sets the directory to read-only
# Define a destination inside the protected directory
destination = protected_dir.join("destination_file.txt")
# Instantiate the filesystem (assuming the local file system interface)
fs = fsspec.filesystem("file")
# Try to move the file to the read-only directory, expecting a permission error
with pytest.raises(PermissionError):
fs.mv(str(source), str(destination), recursive=recursive)
# Assert the file was not created in the destination
assert not os.path.exists(destination)
# Cleanup: Restore permissions so the directory can be cleaned up
if os.name == "nt":
os.system(f'icacls "{protected_path}" /remove:d Everyone')
else:
os.chmod(protected_path, 0o755) # Restore write permission for cleanup

View File

@ -0,0 +1,591 @@
from hashlib import md5
from itertools import product
import pytest
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
class AbstractPutTests:
def test_put_file_to_existing_directory(
self,
fs,
fs_join,
fs_target,
local_join,
local_bulk_operations_scenario_0,
supports_empty_directories,
):
# Copy scenario 1a
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
fs.touch(fs_join(target, "dummy"))
assert fs.isdir(target)
target_file2 = fs_join(target, "file2")
target_subfile1 = fs_join(target, "subfile1")
# Copy from source directory
fs.put(local_join(source, "file2"), target)
assert fs.isfile(target_file2)
# Copy from sub directory
fs.put(local_join(source, "subdir", "subfile1"), target)
assert fs.isfile(target_subfile1)
# Remove copied files
fs.rm([target_file2, target_subfile1])
assert not fs.exists(target_file2)
assert not fs.exists(target_subfile1)
# Repeat with trailing slash on target
fs.put(local_join(source, "file2"), target + "/")
assert fs.isdir(target)
assert fs.isfile(target_file2)
fs.put(local_join(source, "subdir", "subfile1"), target + "/")
assert fs.isfile(target_subfile1)
def test_put_file_to_new_directory(
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
):
# Copy scenario 1b
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
fs.put(
local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
) # Note trailing slash
assert fs.isdir(target)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
def test_put_file_to_file_in_existing_directory(
self,
fs,
fs_join,
fs_target,
local_join,
supports_empty_directories,
local_bulk_operations_scenario_0,
):
# Copy scenario 1c
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
fs.touch(fs_join(target, "dummy"))
assert fs.isdir(target)
fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
assert fs.isfile(fs_join(target, "newfile"))
def test_put_file_to_file_in_new_directory(
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
):
# Copy scenario 1d
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
fs.put(
local_join(source, "subdir", "subfile1"),
fs_join(target, "newdir", "newfile"),
)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "newfile"))
def test_put_directory_to_existing_directory(
self,
fs,
fs_join,
fs_target,
local_bulk_operations_scenario_0,
supports_empty_directories,
):
# Copy scenario 1e
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
dummy = fs_join(target, "dummy")
fs.touch(dummy)
assert fs.isdir(target)
for source_slash, target_slash in zip([False, True], [False, True]):
s = fs_join(source, "subdir")
if source_slash:
s += "/"
t = target + "/" if target_slash else target
# Without recursive does nothing
fs.put(s, t)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# With recursive
fs.put(s, t, recursive=True)
if source_slash:
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert fs.isdir(fs_join(target, "nesteddir"))
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
fs_join(target, "nesteddir"),
],
recursive=True,
)
else:
assert fs.isdir(fs_join(target, "subdir"))
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
fs.rm(fs_join(target, "subdir"), recursive=True)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# Limit recursive by maxdepth
fs.put(s, t, recursive=True, maxdepth=1)
if source_slash:
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.exists(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
],
recursive=True,
)
else:
assert fs.isdir(fs_join(target, "subdir"))
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
fs.rm(fs_join(target, "subdir"), recursive=True)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
def test_put_directory_to_new_directory(
self,
fs,
fs_join,
fs_target,
local_bulk_operations_scenario_0,
supports_empty_directories,
):
# Copy scenario 1f
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
for source_slash, target_slash in zip([False, True], [False, True]):
s = fs_join(source, "subdir")
if source_slash:
s += "/"
t = fs_join(target, "newdir")
if target_slash:
t += "/"
# Without recursive does nothing
fs.put(s, t)
if supports_empty_directories:
assert fs.ls(target) == []
else:
with pytest.raises(FileNotFoundError):
fs.ls(target)
# With recursive
fs.put(s, t, recursive=True)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
# Limit recursive by maxdepth
fs.put(s, t, recursive=True, maxdepth=1)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
def test_put_glob_to_existing_directory(
self,
fs,
fs_join,
fs_target,
local_join,
supports_empty_directories,
local_bulk_operations_scenario_0,
):
# Copy scenario 1g
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
dummy = fs_join(target, "dummy")
fs.touch(dummy)
assert fs.isdir(target)
for target_slash in [False, True]:
t = target + "/" if target_slash else target
# Without recursive
fs.put(local_join(source, "subdir", "*"), t)
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.isdir(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# With recursive
for glob, recursive in zip(["*", "**"], [True, False]):
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert fs.isdir(fs_join(target, "nesteddir"))
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
fs_join(target, "nesteddir"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
# Limit recursive by maxdepth
fs.put(
local_join(source, "subdir", glob),
t,
recursive=recursive,
maxdepth=1,
)
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.exists(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
def test_put_glob_to_new_directory(
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
):
# Copy scenario 1h
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
for target_slash in [False, True]:
t = fs_join(target, "newdir")
if target_slash:
t += "/"
# Without recursive
fs.put(local_join(source, "subdir", "*"), t)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
assert not fs.exists(fs_join(target, "newdir", "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
# With recursive
for glob, recursive in zip(["*", "**"], [True, False]):
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))
assert not fs.exists(fs_join(target, "newdir", "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
# Limit recursive by maxdepth
fs.put(
local_join(source, "subdir", glob),
t,
recursive=recursive,
maxdepth=1,
)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
assert not fs.exists(fs_join(target, "newdir", "subdir"))
fs.rm(fs_join(target, "newdir"), recursive=True)
assert not fs.exists(fs_join(target, "newdir"))
@pytest.mark.parametrize(
GLOB_EDGE_CASES_TESTS["argnames"],
GLOB_EDGE_CASES_TESTS["argvalues"],
)
def test_put_glob_edge_cases(
self,
path,
recursive,
maxdepth,
expected,
fs,
fs_join,
fs_target,
local_glob_edge_cases_files,
local_join,
fs_sanitize_path,
):
# Copy scenario 1g
source = local_glob_edge_cases_files
target = fs_target
for new_dir, target_slash in product([True, False], [True, False]):
fs.mkdir(target)
t = fs_join(target, "newdir") if new_dir else target
t = t + "/" if target_slash else t
fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
output = fs.find(target)
if new_dir:
prefixed_expected = [
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
]
else:
prefixed_expected = [
fs_sanitize_path(fs_join(target, p)) for p in expected
]
assert sorted(output) == sorted(prefixed_expected)
try:
fs.rm(target, recursive=True)
except FileNotFoundError:
pass
def test_put_list_of_files_to_existing_directory(
self,
fs,
fs_join,
fs_target,
local_join,
local_bulk_operations_scenario_0,
supports_empty_directories,
):
# Copy scenario 2a
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
if not supports_empty_directories:
# Force target directory to exist by adding a dummy file
dummy = fs_join(target, "dummy")
fs.touch(dummy)
assert fs.isdir(target)
source_files = [
local_join(source, "file1"),
local_join(source, "file2"),
local_join(source, "subdir", "subfile1"),
]
for target_slash in [False, True]:
t = target + "/" if target_slash else target
fs.put(source_files, t)
assert fs.isfile(fs_join(target, "file1"))
assert fs.isfile(fs_join(target, "file2"))
assert fs.isfile(fs_join(target, "subfile1"))
fs.rm(
[
fs_join(target, "file1"),
fs_join(target, "file2"),
fs_join(target, "subfile1"),
],
recursive=True,
)
assert fs.ls(target, detail=False) == (
[] if supports_empty_directories else [dummy]
)
def test_put_list_of_files_to_new_directory(
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
):
# Copy scenario 2b
source = local_bulk_operations_scenario_0
target = fs_target
fs.mkdir(target)
source_files = [
local_join(source, "file1"),
local_join(source, "file2"),
local_join(source, "subdir", "subfile1"),
]
fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "file1"))
assert fs.isfile(fs_join(target, "newdir", "file2"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
def test_put_directory_recursive(
self, fs, fs_join, fs_target, local_fs, local_join, local_path
):
# https://github.com/fsspec/filesystem_spec/issues/1062
# Recursive cp/get/put of source directory into non-existent target directory.
src = local_join(local_path, "src")
src_file = local_join(src, "file")
local_fs.mkdir(src)
local_fs.touch(src_file)
target = fs_target
# put without slash
assert not fs.exists(target)
for loop in range(2):
fs.put(src, target, recursive=True)
assert fs.isdir(target)
if loop == 0:
assert fs.isfile(fs_join(target, "file"))
assert not fs.exists(fs_join(target, "src"))
else:
assert fs.isfile(fs_join(target, "file"))
assert fs.isdir(fs_join(target, "src"))
assert fs.isfile(fs_join(target, "src", "file"))
fs.rm(target, recursive=True)
# put with slash
assert not fs.exists(target)
for loop in range(2):
fs.put(src + "/", target, recursive=True)
assert fs.isdir(target)
assert fs.isfile(fs_join(target, "file"))
assert not fs.exists(fs_join(target, "src"))
def test_put_directory_without_files_with_same_name_prefix(
self,
fs,
fs_join,
fs_target,
local_join,
local_dir_and_file_with_same_name_prefix,
supports_empty_directories,
):
# Create the test dirs
source = local_dir_and_file_with_same_name_prefix
target = fs_target
# Test without glob
fs.put(local_join(source, "subdir"), fs_target, recursive=True)
assert fs.isfile(fs_join(fs_target, "subfile.txt"))
assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
fs.rm([fs_join(target, "subfile.txt")])
if supports_empty_directories:
assert fs.ls(target) == []
else:
assert not fs.exists(target)
# Test with glob
fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
assert fs.isdir(fs_join(fs_target, "subdir"))
assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
assert fs.isfile(fs_join(fs_target, "subdir.txt"))
def test_copy_with_source_and_destination_as_list(
self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
):
# Create the test dir
source = local_10_files_with_hashed_names
target = fs_target
# Create list of files for source and destination
source_files = []
destination_files = []
for i in range(10):
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
source_files.append(local_join(source, f"{hashed_i}.txt"))
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
# Copy and assert order was kept
fs.put(lpath=source_files, rpath=destination_files)
for i in range(10):
file_content = fs.cat(destination_files[i]).decode("utf-8")
assert file_content == str(i)