From 341bf7a84bc59677ad3f0a25a05e2d048302aca0 Mon Sep 17 00:00:00 2001 From: atollk Date: Fri, 19 Mar 2021 22:21:49 +0100 Subject: [PATCH 1/9] Added _copy_if functions to fs.copy module. The free functions copy_file, copy_file_if_newer, copy_dir, copy_dir_if_newer, copy_fs, and copy_fs_if_newer now are all implemented in terms of copy_file_if, copy_dir_if, and copy_fs_if. Unit Tests for this change will be created in a future commit. This change streamlined the code by removing duplication in logic and clustering similar logic closer together. Also, though this has not been tested, copy_dir_if_newer should be faster in this implementation when copying to a dst_dir that contains a lot of files that are not present in the respective src_dir. Finally, a bug was fixed that was caused by a false lookup in the dictionary of file infos from dst_fs. This fix could cause unnecessary copies and therefore a decrease in performance. --- fs/copy.py | 417 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 261 insertions(+), 156 deletions(-) diff --git a/fs/copy.py b/fs/copy.py index 6cd34392..3f949cc3 100644 --- a/fs/copy.py +++ b/fs/copy.py @@ -5,7 +5,7 @@ import typing -from .errors import FSError +from .errors import ResourceNotFound from .opener import manage_fs from .path import abspath, combine, frombase, normpath from .tools import is_thread_safe @@ -41,9 +41,7 @@ def copy_fs( a single-threaded copy. """ - return copy_dir( - src_fs, "/", dst_fs, "/", walker=walker, on_copy=on_copy, workers=workers - ) + return copy_fs_if(src_fs, dst_fs, "always", walker, on_copy, workers) def copy_fs_if_newer( @@ -74,38 +72,61 @@ def copy_fs_if_newer( a single-threaded copy. """ - return copy_dir_if_newer( - src_fs, "/", dst_fs, "/", walker=walker, on_copy=on_copy, workers=workers - ) + return copy_fs_if(src_fs, dst_fs, "newer", walker, on_copy, workers) -def _source_is_newer(src_fs, src_path, dst_fs, dst_path): - # type: (FS, Text, FS, Text) -> bool - """Determine if source file is newer than destination file. +def copy_fs_if( + src_fs, # type: Union[FS, Text] + dst_fs, # type: Union[FS, Text] + condition="always", # type: Text + walker=None, # type: Optional[Walker] + on_copy=None, # type: Optional[_OnCopy] + workers=0, # type: int +): + # type: (...) -> None + """Copy the contents of one filesystem to another, depending on a condition. - Arguments: - src_fs (FS): Source filesystem (instance or URL). - src_path (str): Path to a file on the source filesystem. - dst_fs (FS): Destination filesystem (instance or URL). - dst_path (str): Path to a file on the destination filesystem. + Depending on the value of ``strategy``, certain conditions must be fulfilled + for a file to be copied to ``dst_fs``. - Returns: - bool: `True` if the source file is newer than the destination - file or file modification time cannot be determined, `False` - otherwise. + If ``condition`` has the value ``"newer"``, the last modification time + of the source file must be newer than that of the destination file. + If either file has no modification time, the copy is performed always. + + If ``condition`` has the value ``"older"``, the last modification time + of the source file must be older than that of the destination file. + If either file has no modification time, the copy is performed always. + + If ``condition`` has the value ``"exists"``, the source file is only + copied if a file of the same path already exists in ``dst_fs``. + + If ``condition`` has the value ``"not_exists"``, the source file is only + copied if no file of the same path already exists in ``dst_fs``. + + Arguments: + src_fs (FS or str): Source filesystem (URL or instance). + dst_fs (FS or str): Destination filesystem (URL or instance). + condition (str): Name of the condition to check for each file. + walker (~fs.walk.Walker, optional): A walker object that will be + used to scan for files in ``src_fs``. Set this if you only want + to consider a sub-set of the resources in ``src_fs``. + on_copy (callable):A function callback called after a single file copy + is executed. Expected signature is ``(src_fs, src_path, dst_fs, + dst_path)``. + workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for + a single-threaded copy. """ - try: - if dst_fs.exists(dst_path): - namespace = ("details", "modified") - src_modified = src_fs.getinfo(src_path, namespace).modified - if src_modified is not None: - dst_modified = dst_fs.getinfo(dst_path, namespace).modified - return dst_modified is None or src_modified > dst_modified - return True - except FSError: # pragma: no cover - # todo: should log something here - return True + return copy_dir_if( + src_fs, + "/", + dst_fs, + "/", + condition, + walker=walker, + on_copy=on_copy, + workers=workers, + ) def copy_file( @@ -126,76 +147,70 @@ def copy_file( dst_path (str): Path to a file on the destination filesystem. """ - with manage_fs(src_fs, writeable=False) as _src_fs: - with manage_fs(dst_fs, create=True) as _dst_fs: - if _src_fs is _dst_fs: - # Same filesystem, so we can do a potentially optimized - # copy - _src_fs.copy(src_path, dst_path, overwrite=True) - else: - # Standard copy - with _src_fs.lock(), _dst_fs.lock(): - if _dst_fs.hassyspath(dst_path): - with _dst_fs.openbin(dst_path, "w") as write_file: - _src_fs.download(src_path, write_file) - else: - with _src_fs.openbin(src_path) as read_file: - _dst_fs.upload(dst_path, read_file) + copy_file_if(src_fs, src_path, dst_fs, dst_path, "always") -def copy_file_internal( - src_fs, # type: FS +def copy_file_if_newer( + src_fs, # type: Union[FS, Text] src_path, # type: Text - dst_fs, # type: FS + dst_fs, # type: Union[FS, Text] dst_path, # type: Text ): - # type: (...) -> None - """Copy a file at low level, without calling `manage_fs` or locking. + # type: (...) -> bool + """Copy a file from one filesystem to another, checking times. If the destination exists, and is a file, it will be first truncated. - - This method exists to optimize copying in loops. In general you - should prefer `copy_file`. + If both source and destination files exist, the copy is executed only + if the source file is newer than the destination file. In case + modification times of source or destination files are not available, + copy is always executed. Arguments: - src_fs (FS): Source filesystem. + src_fs (FS or str): Source filesystem (instance or URL). src_path (str): Path to a file on the source filesystem. - dst_fs (FS): Destination filesystem. + dst_fs (FS or str): Destination filesystem (instance or URL). dst_path (str): Path to a file on the destination filesystem. + Returns: + bool: `True` if the file copy was executed, `False` otherwise. + """ - if src_fs is dst_fs: - # Same filesystem, so we can do a potentially optimized - # copy - src_fs.copy(src_path, dst_path, overwrite=True) - elif dst_fs.hassyspath(dst_path): - with dst_fs.openbin(dst_path, "w") as write_file: - src_fs.download(src_path, write_file) - else: - with src_fs.openbin(src_path) as read_file: - dst_fs.upload(dst_path, read_file) + return copy_file_if(src_fs, src_path, dst_fs, dst_path, "newer") -def copy_file_if_newer( +def copy_file_if( src_fs, # type: Union[FS, Text] src_path, # type: Text dst_fs, # type: Union[FS, Text] dst_path, # type: Text + condition, # type: Text ): # type: (...) -> bool - """Copy a file from one filesystem to another, checking times. + """Copy a file from one filesystem to another, depending on a condition. - If the destination exists, and is a file, it will be first truncated. - If both source and destination files exist, the copy is executed only - if the source file is newer than the destination file. In case - modification times of source or destination files are not available, - copy is always executed. + Depending on the value of ``strategy``, certain conditions must be fulfilled + for a file to be copied to ``dst_fs``. + + If ``condition`` has the value ``"newer"``, the last modification time + of the source file must be newer than that of the destination file. + If either file has no modification time, the copy is performed always. + + If ``condition`` has the value ``"older"``, the last modification time + of the source file must be older than that of the destination file. + If either file has no modification time, the copy is performed always. + + If ``condition`` has the value ``"exists"``, the source file is only + copied if a file of the same path already exists in ``dst_fs``. + + If ``condition`` has the value ``"not_exists"``, the source file is only + copied if no file of the same path already exists in ``dst_fs``. Arguments: src_fs (FS or str): Source filesystem (instance or URL). src_path (str): Path to a file on the source filesystem. dst_fs (FS or str): Destination filesystem (instance or URL). dst_path (str): Path to a file on the destination filesystem. + condition (str): Name of the condition to check for each file. Returns: bool: `True` if the file copy was executed, `False` otherwise. @@ -203,28 +218,64 @@ def copy_file_if_newer( """ with manage_fs(src_fs, writeable=False) as _src_fs: with manage_fs(dst_fs, create=True) as _dst_fs: - if _src_fs is _dst_fs: - # Same filesystem, so we can do a potentially optimized - # copy - if _source_is_newer(_src_fs, src_path, _dst_fs, dst_path): - _src_fs.copy(src_path, dst_path, overwrite=True) - return True - else: - return False - else: - # Standard copy - with _src_fs.lock(), _dst_fs.lock(): - if _source_is_newer(_src_fs, src_path, _dst_fs, dst_path): - copy_file_internal(_src_fs, src_path, _dst_fs, dst_path) - return True - else: - return False + do_copy = _copy_is_necessary( + _src_fs, src_path, _dst_fs, dst_path, condition + ) + if do_copy: + copy_file_internal(_src_fs, src_path, _dst_fs, dst_path, True) + return do_copy + + +def copy_file_internal( + src_fs, # type: FS + src_path, # type: Text + dst_fs, # type: FS + dst_path, # type: Text + lock=False, # type: bool +): + # type: (...) -> None + """Copy a file at low level, without calling `manage_fs` or locking. + + If the destination exists, and is a file, it will be first truncated. + + This method exists to optimize copying in loops. In general you + should prefer `copy_file`. + + Arguments: + src_fs (FS): Source filesystem. + src_path (str): Path to a file on the source filesystem. + dst_fs (FS): Destination filesystem. + dst_path (str): Path to a file on the destination filesystem. + lock (bool): Lock both filesystems before copying. + + """ + if src_fs is dst_fs: + # Same filesystem, so we can do a potentially optimized + # copy + src_fs.copy(src_path, dst_path, overwrite=True) + return + + def _copy_locked(): + if dst_fs.hassyspath(dst_path): + with dst_fs.openbin(dst_path, "w") as write_file: + src_fs.download(src_path, write_file) + else: + with src_fs.openbin(src_path) as read_file: + dst_fs.upload(dst_path, read_file) + + if lock: + with src_fs.lock(), dst_fs.lock(): + _copy_locked() + else: + _copy_locked() def copy_structure( src_fs, # type: Union[FS, Text] dst_fs, # type: Union[FS, Text] walker=None, # type: Optional[Walker] + src_root="/", # type: Text + dst_root="/", # type: Text ): # type: (...) -> None """Copy directories (but not files) from ``src_fs`` to ``dst_fs``. @@ -235,14 +286,20 @@ def copy_structure( walker (~fs.walk.Walker, optional): A walker object that will be used to scan for files in ``src_fs``. Set this if you only want to consider a sub-set of the resources in ``src_fs``. + src_root (str): Path of the base directory to consider as the root + of the tree structure to copy. + dst_root (str): Path to the target root of the tree structure. """ walker = walker or Walker() with manage_fs(src_fs) as _src_fs: with manage_fs(dst_fs, create=True) as _dst_fs: with _src_fs.lock(), _dst_fs.lock(): - for dir_path in walker.dirs(_src_fs): - _dst_fs.makedir(dir_path, recreate=True) + _dst_fs.makedirs(dst_root, recreate=True) + for dir_path in walker.dirs(_src_fs, src_root): + _dst_fs.makedir( + combine(dst_root, frombase(src_root, dir_path)), recreate=True + ) def copy_dir( @@ -272,33 +329,7 @@ def copy_dir( a single-threaded copy. """ - on_copy = on_copy or (lambda *args: None) - walker = walker or Walker() - _src_path = abspath(normpath(src_path)) - _dst_path = abspath(normpath(dst_path)) - - def src(): - return manage_fs(src_fs, writeable=False) - - def dst(): - return manage_fs(dst_fs, create=True) - - from ._bulk import Copier - - with src() as _src_fs, dst() as _dst_fs: - with _src_fs.lock(), _dst_fs.lock(): - _thread_safe = is_thread_safe(_src_fs, _dst_fs) - with Copier(num_workers=workers if _thread_safe else 0) as copier: - _dst_fs.makedir(_dst_path, recreate=True) - for dir_path, dirs, files in walker.walk(_src_fs, _src_path): - copy_path = combine(_dst_path, frombase(_src_path, dir_path)) - for info in dirs: - _dst_fs.makedir(info.make_path(copy_path), recreate=True) - for info in files: - src_path = info.make_path(dir_path) - dst_path = info.make_path(copy_path) - copier.copy(_src_fs, src_path, _dst_fs, dst_path) - on_copy(_src_fs, src_path, _dst_fs, dst_path) + copy_dir_if(src_fs, src_path, dst_fs, dst_path, "always", walker, on_copy, workers) def copy_dir_if_newer( @@ -332,54 +363,128 @@ def copy_dir_if_newer( workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for a single-threaded copy. + """ + copy_dir_if(src_fs, src_path, dst_fs, dst_path, "newer", walker, on_copy, workers) + + +def copy_dir_if( + src_fs, # type: Union[FS, Text] + src_path, # type: Text + dst_fs, # type: Union[FS, Text] + dst_path, # type: Text + condition="always", # type: Text + walker=None, # type: Optional[Walker] + on_copy=None, # type: Optional[_OnCopy] + workers=0, # type: int +): + # type: (...) -> None + """Copy a directory from one filesystem to another, depending on a condition. + + Depending on the value of ``strategy``, certain conditions must be + fulfilled for a file to be copied to ``dst_fs``. + + If ``condition`` has the value ``"always"``, the source file is always + copied. + + If ``condition`` has the value ``"newer"``, the last modification time + of the source file must be newer than that of the destination file. + If either file has no modification time, the copy is performed always. + + If ``condition`` has the value ``"older"``, the last modification time + of the source file must be older than that of the destination file. + If either file has no modification time, the copy is performed always. + + If ``condition`` has the value ``"exists"``, the source file is only + copied if a file of the same path already exists in ``dst_fs``. + + If ``condition`` has the value ``"not_exists"``, the source file is only + copied if no file of the same path already exists in ``dst_fs``. + + Arguments: + src_fs (FS or str): Source filesystem (instance or URL). + src_path (str): Path to a directory on the source filesystem. + dst_fs (FS or str): Destination filesystem (instance or URL). + dst_path (str): Path to a directory on the destination filesystem. + condition (str): Name of the condition to check for each file. + walker (~fs.walk.Walker, optional): A walker object that will be + used to scan for files in ``src_fs``. Set this if you only want + to consider a sub-set of the resources in ``src_fs``. + on_copy (callable):A function callback called after a single file copy + is executed. Expected signature is ``(src_fs, src_path, dst_fs, + dst_path)``. + workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for + a single-threaded copy. + """ on_copy = on_copy or (lambda *args: None) walker = walker or Walker() _src_path = abspath(normpath(src_path)) _dst_path = abspath(normpath(dst_path)) - def src(): - return manage_fs(src_fs, writeable=False) - - def dst(): - return manage_fs(dst_fs, create=True) - from ._bulk import Copier - with src() as _src_fs, dst() as _dst_fs: + copy_structure(src_fs, dst_fs, walker, src_path, dst_path) + + with manage_fs(src_fs, writeable=False) as _src_fs, manage_fs( + dst_fs, create=True + ) as _dst_fs: with _src_fs.lock(), _dst_fs.lock(): _thread_safe = is_thread_safe(_src_fs, _dst_fs) with Copier(num_workers=workers if _thread_safe else 0) as copier: - _dst_fs.makedir(_dst_path, recreate=True) - namespace = ("details", "modified") - dst_state = { - path: info - for path, info in walker.info(_dst_fs, _dst_path, namespace) - if info.is_file - } - src_state = [ - (path, info) - for path, info in walker.info(_src_fs, _src_path, namespace) - ] - for dir_path, copy_info in src_state: + for dir_path in walker.files(_src_fs, _src_path): copy_path = combine(_dst_path, frombase(_src_path, dir_path)) - if copy_info.is_dir: - _dst_fs.makedir(copy_path, recreate=True) - elif copy_info.is_file: - # dst file is present, try to figure out if copy - # is necessary - try: - src_modified = copy_info.modified - dst_modified = dst_state[dir_path].modified - except KeyError: - do_copy = True - else: - do_copy = ( - src_modified is None - or dst_modified is None - or src_modified > dst_modified - ) - - if do_copy: - copier.copy(_src_fs, dir_path, _dst_fs, copy_path) - on_copy(_src_fs, dir_path, _dst_fs, copy_path) + if _copy_is_necessary( + _src_fs, dir_path, _dst_fs, copy_path, condition + ): + copier.copy(_src_fs, dir_path, _dst_fs, copy_path) + on_copy(_src_fs, dir_path, _dst_fs, copy_path) + + +def _copy_is_necessary( + src_fs, # type: FS + src_path, # type: Text + dst_fs, # type: FS + dst_path, # type: Text + condition, # type: Text +): + # type: (...) -> bool + + if condition == "always": + return True + + elif condition == "newer": + try: + namespace = ("details", "modified") + src_modified = src_fs.getinfo(src_path, namespace).modified + dst_modified = dst_fs.getinfo(dst_path, namespace).modified + except ResourceNotFound: + return True + else: + return ( + src_modified is None + or dst_modified is None + or src_modified > dst_modified + ) + + elif condition == "older": + try: + namespace = ("details", "modified") + src_modified = src_fs.getinfo(src_path, namespace).modified + dst_modified = dst_fs.getinfo(dst_path, namespace).modified + except ResourceNotFound: + return True + else: + return ( + src_modified is None + or dst_modified is None + or src_modified < dst_modified + ) + + elif condition == "exists": + return dst_fs.exists(dst_path) + + elif condition == "not_exists": + return not dst_fs.exists(dst_path) + + else: + raise ValueError(condition + "is not a valid copy condition.") From 6f3993d0a3d652cf96a1b528cc2f4dc9d43da77d Mon Sep 17 00:00:00 2001 From: atollk Date: Sat, 20 Mar 2021 11:31:32 +0100 Subject: [PATCH 2/9] Implemented unit tests for the recently added _copy_if functions in fs.copy. tests/test_copy experienced a general rework in many areas. Before, tests focused on the most basic of test cases and thus missed more complex situations that could (and actually did) cause errors. Thus, some tests for the same unit were merged to create more relevant test cases. --- tests/test_copy.py | 878 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 745 insertions(+), 133 deletions(-) diff --git a/tests/test_copy.py b/tests/test_copy.py index 63e550e9..92d77c33 100644 --- a/tests/test_copy.py +++ b/tests/test_copy.py @@ -12,7 +12,55 @@ from fs import open_fs -class TestCopy(unittest.TestCase): +def _create_sandbox_dir(prefix="pyfilesystem2_sandbox_", home=None): + if home is None: + return tempfile.mkdtemp(prefix=prefix) + else: + sandbox_path = os.path.join(home, prefix) + mkdirp(sandbox_path) + return sandbox_path + + +def _touch(root, filepath): + # create abs filename + abs_filepath = os.path.join(root, filepath) + # create dir + dirname = os.path.dirname(abs_filepath) + mkdirp(dirname) + # touch file + with open(abs_filepath, "a"): + os.utime( + abs_filepath, None + ) # update the mtime in case the file exists, same as touch + + return abs_filepath + + +def _write_file(filepath, write_chars=1024): + with open(filepath, "w") as f: + f.write("1" * write_chars) + return filepath + + +def _delay_file_utime(filepath, delta_sec): + utcnow = datetime.datetime.utcnow() + unix_timestamp = calendar.timegm(utcnow.timetuple()) + times = unix_timestamp + delta_sec, unix_timestamp + delta_sec + os.utime(filepath, times) + + +def mkdirp(path): + # os.makedirs(path, exist_ok=True) only for python3.? + try: + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +class TestCopySimple(unittest.TestCase): def test_copy_fs(self): for workers in (0, 1, 2, 4): src_fs = open_fs("mem://") @@ -78,50 +126,11 @@ def on_copy(*args): fs.copy.copy_dir(src_fs, "/", dst_fs, "/", on_copy=on_copy) self.assertEqual(on_copy_calls, [(src_fs, "/baz.txt", dst_fs, "/baz.txt")]) - def mkdirp(self, path): - # os.makedirs(path, exist_ok=True) only for python3.? - try: - os.makedirs(path) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - def _create_sandbox_dir(self, prefix="pyfilesystem2_sandbox_", home=None): - if home is None: - return tempfile.mkdtemp(prefix=prefix) - else: - sandbox_path = os.path.join(home, prefix) - self.mkdirp(sandbox_path) - return sandbox_path - - def _touch(self, root, filepath): - # create abs filename - abs_filepath = os.path.join(root, filepath) - # create dir - dirname = os.path.dirname(abs_filepath) - self.mkdirp(dirname) - # touch file - with open(abs_filepath, "a"): - os.utime( - abs_filepath, None - ) # update the mtime in case the file exists, same as touch - - return abs_filepath - - def _write_file(self, filepath, write_chars=1024): - with open(filepath, "w") as f: - f.write("1" * write_chars) - return filepath - - def _delay_file_utime(self, filepath, delta_sec): - utcnow = datetime.datetime.utcnow() - unix_timestamp = calendar.timegm(utcnow.timetuple()) - times = unix_timestamp + delta_sec, unix_timestamp + delta_sec - os.utime(filepath, times) - - def test_copy_file_if_newer_same_fs(self): + +class TestCopyIfNewer(unittest.TestCase): + copy_if_condition = "newer" + + def test_copy_file_if_same_fs(self): src_fs = open_fs("mem://") src_fs.makedir("foo2").touch("exists") src_fs.makedir("foo1").touch("test1.txt") @@ -129,35 +138,42 @@ def test_copy_file_if_newer_same_fs(self): "foo2/exists", datetime.datetime.utcnow() + datetime.timedelta(hours=1) ) self.assertTrue( - fs.copy.copy_file_if_newer( - src_fs, "foo1/test1.txt", src_fs, "foo2/test1.txt.copy" + fs.copy.copy_file_if( + src_fs, + "foo1/test1.txt", + src_fs, + "foo2/test1.txt.copy", + self.copy_if_condition, ) ) self.assertFalse( - fs.copy.copy_file_if_newer(src_fs, "foo1/test1.txt", src_fs, "foo2/exists") + fs.copy.copy_file_if( + src_fs, "foo1/test1.txt", src_fs, "foo2/exists", self.copy_if_condition + ) ) self.assertTrue(src_fs.exists("foo2/test1.txt.copy")) - def test_copy_file_if_newer_dst_older(self): + def test_copy_file_if_dst_is_older(self): try: # create first dst ==> dst is older the src ==> file should be copied - dst_dir = self._create_sandbox_dir() - dst_file1 = self._touch(dst_dir, "file1.txt") - self._write_file(dst_file1) + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "file1.txt") - self._write_file(src_file1) # ensure src file is newer than dst, changing its modification time - self._delay_file_utime(src_file1, delta_sec=60) + _delay_file_utime(src_file1, delta_sec=60) src_fs = open_fs("osfs://" + src_dir) dst_fs = open_fs("osfs://" + dst_dir) self.assertTrue(dst_fs.exists("/file1.txt")) - copied = fs.copy.copy_file_if_newer( - src_fs, "/file1.txt", dst_fs, "/file1.txt" + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition ) self.assertTrue(copied) @@ -166,19 +182,19 @@ def test_copy_file_if_newer_dst_older(self): shutil.rmtree(src_dir) shutil.rmtree(dst_dir) - def test_copy_file_if_newer_dst_doesnt_exists(self): + def test_copy_file_if_dst_doesnt_exists(self): try: - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "file1.txt") - self._write_file(src_file1) + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) - dst_dir = self._create_sandbox_dir() + dst_dir = _create_sandbox_dir() src_fs = open_fs("osfs://" + src_dir) dst_fs = open_fs("osfs://" + dst_dir) - copied = fs.copy.copy_file_if_newer( - src_fs, "/file1.txt", dst_fs, "/file1.txt" + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition ) self.assertTrue(copied) @@ -187,57 +203,320 @@ def test_copy_file_if_newer_dst_doesnt_exists(self): shutil.rmtree(src_dir) shutil.rmtree(dst_dir) - def test_copy_file_if_newer_dst_is_newer(self): + def test_copy_file_if_dst_is_newer(self): try: - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "file1.txt") - self._write_file(src_file1) + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + # ensure dst file is newer than src, changing its modification time + _delay_file_utime(dst_file1, delta_sec=60) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + self.assertTrue(dst_fs.exists("/file1.txt")) + + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertFalse(copied) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) - dst_dir = self._create_sandbox_dir() - dst_file1 = self._touch(dst_dir, "file1.txt") - self._write_file(dst_file1) + def test_copy_fs_if(self): + try: + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + dst_file2 = _touch(dst_dir, "file2.txt") + _write_file(dst_file1) + _write_file(dst_file2) + + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + src_file2 = _touch(src_dir, "file2.txt") + src_file3 = _touch(src_dir, "file3.txt") + _write_file(src_file1) + _write_file(src_file2) + _write_file(src_file3) + + # ensure src_file1 is newer than dst_file1, changing its modification time + # ensure dst_file2 is newer than src_file2, changing its modification time + _delay_file_utime(src_file1, delta_sec=60) + _delay_file_utime(dst_file2, delta_sec=60) src_fs = open_fs("osfs://" + src_dir) dst_fs = open_fs("osfs://" + dst_dir) self.assertTrue(dst_fs.exists("/file1.txt")) + self.assertTrue(dst_fs.exists("/file2.txt")) + + copied = [] + + def on_copy(src_fs, src_path, dst_fs, dst_path): + copied.append(dst_path) + + fs.copy.copy_fs_if( + src_fs, dst_fs, on_copy=on_copy, condition=self.copy_if_condition + ) + + self.assertTrue("/file1.txt" in copied) + self.assertTrue("/file2.txt" not in copied) + self.assertTrue("/file3.txt" in copied) + self.assertTrue(dst_fs.exists("/file1.txt")) + self.assertTrue(dst_fs.exists("/file2.txt")) + self.assertTrue(dst_fs.exists("/file3.txt")) + + src_fs.close() + dst_fs.close() + + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_dir_if(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + src_file2 = _touch(src_dir, os.path.join("one_level_down", "file2.txt")) + _write_file(src_file2) + + dst_dir = _create_sandbox_dir() + mkdirp(os.path.join(dst_dir, "target_dir")) + dst_file1 = _touch(dst_dir, os.path.join("target_dir", "file1.txt")) + _write_file(dst_file1) + + # ensure dst file is newer than src, changing its modification time + _delay_file_utime(dst_file1, delta_sec=60) - copied = fs.copy.copy_file_if_newer( - src_fs, "/file1.txt", dst_fs, "/file1.txt" + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + copied = [] + + def on_copy(src_fs, src_path, dst_fs, dst_path): + copied.append(dst_path) + + fs.copy.copy_dir_if( + src_fs, + "/", + dst_fs, + "/target_dir/", + on_copy=on_copy, + condition=self.copy_if_condition, ) - self.assertEqual(copied, False) + self.assertEqual(copied, ["/target_dir/one_level_down/file2.txt"]) + self.assertTrue(dst_fs.exists("/target_dir/one_level_down/file2.txt")) + + src_fs.close() + dst_fs.close() finally: shutil.rmtree(src_dir) shutil.rmtree(dst_dir) - def test_copy_fs_if_newer_dst_older(self): + def test_copy_dir_if_same_fs(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "src" + os.sep + "file1.txt") + _write_file(src_file1) + + _create_sandbox_dir(home=src_dir) + + src_fs = open_fs("osfs://" + src_dir) + + copied = [] + + def on_copy(src_fs, src_path, dst_fs, dst_path): + copied.append(dst_path) + + fs.copy.copy_dir_if( + src_fs, "/src", src_fs, "/dst", on_copy=on_copy, condition="newer" + ) + + self.assertEqual(copied, ["/dst/file1.txt"]) + self.assertTrue(src_fs.exists("/dst/file1.txt")) + + src_fs.close() + + finally: + shutil.rmtree(src_dir) + + def test_copy_dir_if_multiple_files(self): + try: + src_dir = _create_sandbox_dir() + src_fs = open_fs("osfs://" + src_dir) + src_fs.makedirs("foo/bar") + src_fs.makedirs("foo/empty") + src_fs.touch("test.txt") + src_fs.touch("foo/bar/baz.txt") + + dst_dir = _create_sandbox_dir() + dst_fs = open_fs("osfs://" + dst_dir) + + fs.copy.copy_dir_if(src_fs, "/foo", dst_fs, "/", condition="newer") + + self.assertTrue(dst_fs.isdir("bar")) + self.assertTrue(dst_fs.isdir("empty")) + self.assertTrue(dst_fs.isfile("bar/baz.txt")) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + +class TestCopyIfOlder(unittest.TestCase): + copy_if_condition = "older" + + def test_copy_file_if_same_fs(self): + src_fs = open_fs("mem://") + src_fs.makedir("foo2").touch("exists") + src_fs.makedir("foo1").touch("test1.txt") + src_fs.settimes( + "foo2/exists", datetime.datetime.utcnow() - datetime.timedelta(hours=1) + ) + self.assertTrue( + fs.copy.copy_file_if( + src_fs, + "foo1/test1.txt", + src_fs, + "foo2/test1.txt.copy", + self.copy_if_condition, + ) + ) + self.assertFalse( + fs.copy.copy_file_if( + src_fs, "foo1/test1.txt", src_fs, "foo2/exists", self.copy_if_condition + ) + ) + self.assertTrue(src_fs.exists("foo2/test1.txt.copy")) + + def test_copy_file_if_dst_is_older(self): try: # create first dst ==> dst is older the src ==> file should be copied - dst_dir = self._create_sandbox_dir() - dst_file1 = self._touch(dst_dir, "file1.txt") - self._write_file(dst_file1) + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "file1.txt") - self._write_file(src_file1) # ensure src file is newer than dst, changing its modification time - self._delay_file_utime(src_file1, delta_sec=60) + _delay_file_utime(src_file1, delta_sec=60) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + self.assertTrue(dst_fs.exists("/file1.txt")) + + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertFalse(copied) + self.assertTrue(dst_fs.exists("/file1.txt")) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_file_if_dst_doesnt_exists(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + dst_dir = _create_sandbox_dir() src_fs = open_fs("osfs://" + src_dir) dst_fs = open_fs("osfs://" + dst_dir) + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertTrue(copied) self.assertTrue(dst_fs.exists("/file1.txt")) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_file_if_dst_is_newer(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + # ensure dst file is newer than src, changing its modification time + _delay_file_utime(dst_file1, delta_sec=60) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + self.assertTrue(dst_fs.exists("/file1.txt")) + + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertTrue(copied) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_fs_if(self): + try: + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + dst_file2 = _touch(dst_dir, "file2.txt") + _write_file(dst_file1) + _write_file(dst_file2) + + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + src_file2 = _touch(src_dir, "file2.txt") + src_file3 = _touch(src_dir, "file3.txt") + _write_file(src_file1) + _write_file(src_file2) + _write_file(src_file3) + + # ensure src_file1 is newer than dst_file1, changing its modification time + # ensure dst_file2 is newer than src_file2, changing its modification time + _delay_file_utime(src_file1, delta_sec=60) + _delay_file_utime(dst_file2, delta_sec=60) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + self.assertTrue(dst_fs.exists("/file1.txt")) + self.assertTrue(dst_fs.exists("/file2.txt")) copied = [] def on_copy(src_fs, src_path, dst_fs, dst_path): copied.append(dst_path) - fs.copy.copy_fs_if_newer(src_fs, dst_fs, on_copy=on_copy) + fs.copy.copy_fs_if( + src_fs, dst_fs, on_copy=on_copy, condition=self.copy_if_condition + ) - self.assertEqual(copied, ["/file1.txt"]) + self.assertTrue("/file1.txt" not in copied) + self.assertTrue("/file2.txt" in copied) + self.assertTrue("/file3.txt" in copied) self.assertTrue(dst_fs.exists("/file1.txt")) + self.assertTrue(dst_fs.exists("/file2.txt")) + self.assertTrue(dst_fs.exists("/file3.txt")) src_fs.close() dst_fs.close() @@ -246,16 +525,22 @@ def on_copy(src_fs, src_path, dst_fs, dst_path): shutil.rmtree(src_dir) shutil.rmtree(dst_dir) - def test_copy_fs_if_newer_when_dst_doesnt_exists(self): + def test_copy_dir_if(self): try: - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "file1.txt") - self._write_file(src_file1) + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + src_file2 = _touch(src_dir, os.path.join("one_level_down", "file2.txt")) + _write_file(src_file2) - src_file2 = self._touch(src_dir, "one_level_down" + os.sep + "file2.txt") - self._write_file(src_file2) + dst_dir = _create_sandbox_dir() + mkdirp(os.path.join(dst_dir, "target_dir")) + dst_file1 = _touch(dst_dir, os.path.join("target_dir", "file1.txt")) + _write_file(dst_file1) - dst_dir = self._create_sandbox_dir() + # ensure src file is newer than dst, changing its modification time + _delay_file_utime(src_file1, delta_sec=60) src_fs = open_fs("osfs://" + src_dir) dst_fs = open_fs("osfs://" + dst_dir) @@ -265,31 +550,152 @@ def test_copy_fs_if_newer_when_dst_doesnt_exists(self): def on_copy(src_fs, src_path, dst_fs, dst_path): copied.append(dst_path) - fs.copy.copy_fs_if_newer(src_fs, dst_fs, on_copy=on_copy) + fs.copy.copy_dir_if( + src_fs, + "/", + dst_fs, + "/target_dir/", + on_copy=on_copy, + condition=self.copy_if_condition, + ) - self.assertEqual(copied, ["/file1.txt", "/one_level_down/file2.txt"]) - self.assertTrue(dst_fs.exists("/file1.txt")) - self.assertTrue(dst_fs.exists("/one_level_down/file2.txt")) + self.assertEqual(copied, ["/target_dir/one_level_down/file2.txt"]) + self.assertTrue(dst_fs.exists("/target_dir/one_level_down/file2.txt")) src_fs.close() dst_fs.close() + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_dir_if_same_fs(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "src" + os.sep + "file1.txt") + _write_file(src_file1) + + _create_sandbox_dir(home=src_dir) + + src_fs = open_fs("osfs://" + src_dir) + + copied = [] + + def on_copy(src_fs, src_path, dst_fs, dst_path): + copied.append(dst_path) + + fs.copy.copy_dir_if( + src_fs, "/src", src_fs, "/dst", on_copy=on_copy, condition="newer" + ) + + self.assertEqual(copied, ["/dst/file1.txt"]) + self.assertTrue(src_fs.exists("/dst/file1.txt")) + + src_fs.close() + + finally: + shutil.rmtree(src_dir) + + def test_copy_dir_if_multiple_files(self): + try: + src_dir = _create_sandbox_dir() + src_fs = open_fs("osfs://" + src_dir) + src_fs.makedirs("foo/bar") + src_fs.makedirs("foo/empty") + src_fs.touch("test.txt") + src_fs.touch("foo/bar/baz.txt") + + dst_dir = _create_sandbox_dir() + dst_fs = open_fs("osfs://" + dst_dir) + + fs.copy.copy_dir_if(src_fs, "/foo", dst_fs, "/", condition="newer") + + self.assertTrue(dst_fs.isdir("bar")) + self.assertTrue(dst_fs.isdir("empty")) + self.assertTrue(dst_fs.isfile("bar/baz.txt")) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + +class TestCopyIfExists(unittest.TestCase): + copy_if_condition = "exists" + + def test_copy_file_if_same_fs(self): + src_fs = open_fs("mem://") + src_fs.makedir("foo2").touch("exists") + src_fs.makedir("foo1").touch("test1.txt") + self.assertFalse( + fs.copy.copy_file_if( + src_fs, + "foo1/test1.txt", + src_fs, + "foo2/test1.txt.copy", + self.copy_if_condition, + ) + ) + self.assertTrue( + fs.copy.copy_file_if( + src_fs, "foo1/test1.txt", src_fs, "foo2/exists", self.copy_if_condition + ) + ) + self.assertFalse(src_fs.exists("foo2/test1.txt.copy")) + + def test_copy_file_if_dst_doesnt_exists(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + dst_dir = _create_sandbox_dir() + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertFalse(copied) + self.assertFalse(dst_fs.exists("/file1.txt")) finally: shutil.rmtree(src_dir) shutil.rmtree(dst_dir) - def test_copy_fs_if_newer_dont_copy_when_dst_exists(self): + def test_copy_file_if_dst_exists(self): try: - # src is older than dst => no copy should be necessary - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "file1.txt") - self._write_file(src_file1) - - dst_dir = self._create_sandbox_dir() - dst_file1 = self._touch(dst_dir, "file1.txt") - self._write_file(dst_file1) - # ensure dst file is newer than src, changing its modification time - self._delay_file_utime(dst_file1, delta_sec=60) + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + self.assertTrue(dst_fs.exists("/file1.txt")) + + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertTrue(copied) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_fs_if(self): + try: + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + src_file2 = _touch(src_dir, "file2.txt") + _write_file(src_file1) + _write_file(src_file2) src_fs = open_fs("osfs://" + src_dir) dst_fs = open_fs("osfs://" + dst_dir) @@ -301,10 +707,13 @@ def test_copy_fs_if_newer_dont_copy_when_dst_exists(self): def on_copy(src_fs, src_path, dst_fs, dst_path): copied.append(dst_path) - fs.copy.copy_fs_if_newer(src_fs, dst_fs, on_copy=on_copy) + fs.copy.copy_fs_if( + src_fs, dst_fs, on_copy=on_copy, condition=self.copy_if_condition + ) - self.assertEqual(copied, []) + self.assertEqual(copied, ["/file1.txt"]) self.assertTrue(dst_fs.exists("/file1.txt")) + self.assertFalse(dst_fs.exists("/file2.txt")) src_fs.close() dst_fs.close() @@ -313,48 +722,249 @@ def on_copy(src_fs, src_path, dst_fs, dst_path): shutil.rmtree(src_dir) shutil.rmtree(dst_dir) - def test_copy_dir_if_newer_one_dst_doesnt_exist(self): + def test_copy_dir_if(self): try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "file1.txt") - self._write_file(src_file1) + src_file2 = _touch(src_dir, os.path.join("one_level_down", "file2.txt")) + _write_file(src_file2) - src_file2 = self._touch(src_dir, "one_level_down" + os.sep + "file2.txt") - self._write_file(src_file2) + dst_dir = _create_sandbox_dir() + mkdirp(os.path.join(dst_dir, "target_dir")) + dst_file1 = _touch(dst_dir, os.path.join("target_dir", "file1.txt")) + _write_file(dst_file1) - dst_dir = self._create_sandbox_dir() - dst_file1 = self._touch(dst_dir, "file1.txt") - self._write_file(dst_file1) - # ensure dst file is newer than src, changing its modification time - self._delay_file_utime(dst_file1, delta_sec=60) + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + copied = [] + + def on_copy(src_fs, src_path, dst_fs, dst_path): + copied.append(dst_path) + + fs.copy.copy_dir_if( + src_fs, + "/", + dst_fs, + "/target_dir/", + on_copy=on_copy, + condition=self.copy_if_condition, + ) + + self.assertEqual(copied, ["/target_dir/file1.txt"]) + self.assertFalse(dst_fs.exists("/target_dir/one_level_down/file2.txt")) + + src_fs.close() + dst_fs.close() + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_dir_if_same_fs(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "src" + os.sep + "file1.txt") + _write_file(src_file1) + + _create_sandbox_dir(home=src_dir) + + src_fs = open_fs("osfs://" + src_dir) + + copied = [] + + def on_copy(src_fs, src_path, dst_fs, dst_path): + copied.append(dst_path) + + fs.copy.copy_dir_if( + src_fs, "/src", src_fs, "/dst", on_copy=on_copy, condition="newer" + ) + + self.assertEqual(copied, ["/dst/file1.txt"]) + self.assertTrue(src_fs.exists("/dst/file1.txt")) + + src_fs.close() + + finally: + shutil.rmtree(src_dir) + + def test_copy_dir_if_multiple_files(self): + try: + src_dir = _create_sandbox_dir() + src_fs = open_fs("osfs://" + src_dir) + src_fs.makedirs("foo/bar") + src_fs.makedirs("foo/empty") + src_fs.touch("test.txt") + src_fs.touch("foo/bar/baz.txt") + + dst_dir = _create_sandbox_dir() + dst_fs = open_fs("osfs://" + dst_dir) + + fs.copy.copy_dir_if(src_fs, "/foo", dst_fs, "/", condition="newer") + + self.assertTrue(dst_fs.isdir("bar")) + self.assertTrue(dst_fs.isdir("empty")) + self.assertTrue(dst_fs.isfile("bar/baz.txt")) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + +class TestCopyIfNotExists(unittest.TestCase): + copy_if_condition = "not_exists" + + def test_copy_file_if_same_fs(self): + src_fs = open_fs("mem://") + src_fs.makedir("foo2").touch("exists") + src_fs.makedir("foo1").touch("test1.txt") + self.assertTrue( + fs.copy.copy_file_if( + src_fs, + "foo1/test1.txt", + src_fs, + "foo2/test1.txt.copy", + self.copy_if_condition, + ) + ) + self.assertFalse( + fs.copy.copy_file_if( + src_fs, "foo1/test1.txt", src_fs, "foo2/exists", self.copy_if_condition + ) + ) + self.assertTrue(src_fs.exists("foo2/test1.txt.copy")) + + def test_copy_file_if_dst_doesnt_exists(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + dst_dir = _create_sandbox_dir() src_fs = open_fs("osfs://" + src_dir) dst_fs = open_fs("osfs://" + dst_dir) + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertTrue(copied) + self.assertTrue(dst_fs.exists("/file1.txt")) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_file_if_dst_exists(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) + + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + self.assertTrue(dst_fs.exists("/file1.txt")) + + copied = fs.copy.copy_file_if( + src_fs, "/file1.txt", dst_fs, "/file1.txt", self.copy_if_condition + ) + + self.assertFalse(copied) + self.assertTrue(dst_fs.exists("/file1.txt")) + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_fs_if(self): + try: + dst_dir = _create_sandbox_dir() + dst_file1 = _touch(dst_dir, "file1.txt") + _write_file(dst_file1) + + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + src_file2 = _touch(src_dir, "file2.txt") + _write_file(src_file1) + _write_file(src_file2) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + self.assertTrue(dst_fs.exists("/file1.txt")) + copied = [] def on_copy(src_fs, src_path, dst_fs, dst_path): copied.append(dst_path) - fs.copy.copy_dir_if_newer(src_fs, "/", dst_fs, "/", on_copy=on_copy) + fs.copy.copy_fs_if( + src_fs, dst_fs, on_copy=on_copy, condition=self.copy_if_condition + ) - self.assertEqual(copied, ["/one_level_down/file2.txt"]) - self.assertTrue(dst_fs.exists("/one_level_down/file2.txt")) + self.assertEqual(copied, ["/file2.txt"]) + self.assertTrue(dst_fs.exists("/file1.txt")) + self.assertTrue(dst_fs.exists("/file2.txt")) src_fs.close() dst_fs.close() + finally: shutil.rmtree(src_dir) shutil.rmtree(dst_dir) - def test_copy_dir_if_newer_same_fs(self): + def test_copy_dir_if(self): try: - src_dir = self._create_sandbox_dir() - src_file1 = self._touch(src_dir, "src" + os.sep + "file1.txt") - self._write_file(src_file1) + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "file1.txt") + _write_file(src_file1) - self._create_sandbox_dir(home=src_dir) + src_file2 = _touch(src_dir, os.path.join("one_level_down", "file2.txt")) + _write_file(src_file2) + + dst_dir = _create_sandbox_dir() + mkdirp(os.path.join(dst_dir, "target_dir")) + dst_file1 = _touch(dst_dir, os.path.join("target_dir", "file1.txt")) + _write_file(dst_file1) + + src_fs = open_fs("osfs://" + src_dir) + dst_fs = open_fs("osfs://" + dst_dir) + + copied = [] + + def on_copy(src_fs, src_path, dst_fs, dst_path): + copied.append(dst_path) + + fs.copy.copy_dir_if( + src_fs, + "/", + dst_fs, + "/target_dir/", + on_copy=on_copy, + condition=self.copy_if_condition, + ) + + self.assertEqual(copied, ["/target_dir/one_level_down/file2.txt"]) + self.assertTrue(dst_fs.exists("/target_dir/file1.txt")) + self.assertTrue(dst_fs.exists("/target_dir/one_level_down/file2.txt")) + + src_fs.close() + dst_fs.close() + finally: + shutil.rmtree(src_dir) + shutil.rmtree(dst_dir) + + def test_copy_dir_if_same_fs(self): + try: + src_dir = _create_sandbox_dir() + src_file1 = _touch(src_dir, "src" + os.sep + "file1.txt") + _write_file(src_file1) + + _create_sandbox_dir(home=src_dir) src_fs = open_fs("osfs://" + src_dir) @@ -363,7 +973,9 @@ def test_copy_dir_if_newer_same_fs(self): def on_copy(src_fs, src_path, dst_fs, dst_path): copied.append(dst_path) - fs.copy.copy_dir_if_newer(src_fs, "/src", src_fs, "/dst", on_copy=on_copy) + fs.copy.copy_dir_if( + src_fs, "/src", src_fs, "/dst", on_copy=on_copy, condition="newer" + ) self.assertEqual(copied, ["/dst/file1.txt"]) self.assertTrue(src_fs.exists("/dst/file1.txt")) @@ -373,19 +985,19 @@ def on_copy(src_fs, src_path, dst_fs, dst_path): finally: shutil.rmtree(src_dir) - def test_copy_dir_if_newer_multiple_files(self): + def test_copy_dir_if_multiple_files(self): try: - src_dir = self._create_sandbox_dir() + src_dir = _create_sandbox_dir() src_fs = open_fs("osfs://" + src_dir) src_fs.makedirs("foo/bar") src_fs.makedirs("foo/empty") src_fs.touch("test.txt") src_fs.touch("foo/bar/baz.txt") - dst_dir = self._create_sandbox_dir() + dst_dir = _create_sandbox_dir() dst_fs = open_fs("osfs://" + dst_dir) - fs.copy.copy_dir_if_newer(src_fs, "/foo", dst_fs, "/") + fs.copy.copy_dir_if(src_fs, "/foo", dst_fs, "/", condition="newer") self.assertTrue(dst_fs.isdir("bar")) self.assertTrue(dst_fs.isdir("empty")) From d3fb588558ff0e9c16e6245a87d6872b244c12b9 Mon Sep 17 00:00:00 2001 From: atollk Date: Sat, 20 Mar 2021 11:35:26 +0100 Subject: [PATCH 3/9] Updated CHANGELOG.md. --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31cb320c..e7b0419b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Added FTP over TLS (FTPS) support to FTPFS. Closes [#437](https://github.com/PyFilesystem/pyfilesystem2/issues/437), [#449](https://github.com/PyFilesystem/pyfilesystem2/pull/449). +- Added `fs.copy.copy_file_if`, `fs.copy.copy_dir_if`, and `fs.copy.copy_fs_if`. + Closes [#458](https://github.com/PyFilesystem/pyfilesystem2/issues/458). ### Changed @@ -31,6 +33,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Make `FTPFile`, `MemoryFile` and `RawWrapper` accept [`array.array`](https://docs.python.org/3/library/array.html) arguments for the `write` and `writelines` methods, as expected by their base class [`io.RawIOBase`](https://docs.python.org/3/library/io.html#io.RawIOBase). - Various documentation issues, including `MemoryFS` docstring not rendering properly. +- Fixed performance bugs in `fs.copy.copy_dir_if_newer`. Test cases were adapted to catch those bugs in the future. ## [2.4.12] - 2021-01-14 From 3a7de8fcd6a947d57292d5903084223c5fc1829a Mon Sep 17 00:00:00 2001 From: atollk Date: Mon, 22 Mar 2021 18:06:30 +0100 Subject: [PATCH 4/9] Marked fs.copy.copy_*_if_newer as deprecated. --- fs/copy.py | 67 ++++++------------------------------------------------ 1 file changed, 7 insertions(+), 60 deletions(-) diff --git a/fs/copy.py b/fs/copy.py index 3f949cc3..e2e82790 100644 --- a/fs/copy.py +++ b/fs/copy.py @@ -4,6 +4,7 @@ from __future__ import print_function, unicode_literals import typing +import warnings from .errors import ResourceNotFound from .opener import manage_fs @@ -52,26 +53,8 @@ def copy_fs_if_newer( workers=0, # type: int ): # type: (...) -> None - """Copy the contents of one filesystem to another, checking times. - - If both source and destination files exist, the copy is executed - only if the source file is newer than the destination file. In case - modification times of source or destination files are not available, - copy file is always executed. - - Arguments: - src_fs (FS or str): Source filesystem (URL or instance). - dst_fs (FS or str): Destination filesystem (URL or instance). - walker (~fs.walk.Walker, optional): A walker object that will be - used to scan for files in ``src_fs``. Set this if you only want - to consider a sub-set of the resources in ``src_fs``. - on_copy (callable):A function callback called after a single file copy - is executed. Expected signature is ``(src_fs, src_path, dst_fs, - dst_path)``. - workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for - a single-threaded copy. - - """ + """Deprecated. Use ``copy_fs_if``.""" + warnings.warn(DeprecationWarning("copy_fs_if_newer is deprecated. Use copy_fs_if instead.")) return copy_fs_if(src_fs, dst_fs, "newer", walker, on_copy, workers) @@ -157,24 +140,8 @@ def copy_file_if_newer( dst_path, # type: Text ): # type: (...) -> bool - """Copy a file from one filesystem to another, checking times. - - If the destination exists, and is a file, it will be first truncated. - If both source and destination files exist, the copy is executed only - if the source file is newer than the destination file. In case - modification times of source or destination files are not available, - copy is always executed. - - Arguments: - src_fs (FS or str): Source filesystem (instance or URL). - src_path (str): Path to a file on the source filesystem. - dst_fs (FS or str): Destination filesystem (instance or URL). - dst_path (str): Path to a file on the destination filesystem. - - Returns: - bool: `True` if the file copy was executed, `False` otherwise. - - """ + """Deprecated. Use ``copy_file_if``.""" + warnings.warn(DeprecationWarning("copy_file_if_newer is deprecated. Use copy_file_if instead.")) return copy_file_if(src_fs, src_path, dst_fs, dst_path, "newer") @@ -342,28 +309,8 @@ def copy_dir_if_newer( workers=0, # type: int ): # type: (...) -> None - """Copy a directory from one filesystem to another, checking times. - - If both source and destination files exist, the copy is executed only - if the source file is newer than the destination file. In case - modification times of source or destination files are not available, - copy is always executed. - - Arguments: - src_fs (FS or str): Source filesystem (instance or URL). - src_path (str): Path to a directory on the source filesystem. - dst_fs (FS or str): Destination filesystem (instance or URL). - dst_path (str): Path to a directory on the destination filesystem. - walker (~fs.walk.Walker, optional): A walker object that will be - used to scan for files in ``src_fs``. Set this if you only - want to consider a sub-set of the resources in ``src_fs``. - on_copy (callable, optional): A function callback called after - a single file copy is executed. Expected signature is - ``(src_fs, src_path, dst_fs, dst_path)``. - workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for - a single-threaded copy. - - """ + """Deprecated. Use ``copy_dir_if``.""" + warnings.warn(DeprecationWarning("copy_dir_if_newer is deprecated. Use copy_dir_if instead.")) copy_dir_if(src_fs, src_path, dst_fs, dst_path, "newer", walker, on_copy, workers) From 8cc977ef02c8fc26b1276c9b4ce0b0fedd9f5a99 Mon Sep 17 00:00:00 2001 From: atollk Date: Mon, 22 Mar 2021 18:14:40 +0100 Subject: [PATCH 5/9] Applied changes suggested by code review. --- fs/copy.py | 139 ++++++++++++++++++++++++++++------------------------- 1 file changed, 73 insertions(+), 66 deletions(-) diff --git a/fs/copy.py b/fs/copy.py index e2e82790..55172afd 100644 --- a/fs/copy.py +++ b/fs/copy.py @@ -53,8 +53,10 @@ def copy_fs_if_newer( workers=0, # type: int ): # type: (...) -> None - """Deprecated. Use ``copy_fs_if``.""" - warnings.warn(DeprecationWarning("copy_fs_if_newer is deprecated. Use copy_fs_if instead.")) + """Use ``copy_fs_if`` instead.""" + warnings.warn( + DeprecationWarning("copy_fs_if_newer is deprecated. Use copy_fs_if instead.") + ) return copy_fs_if(src_fs, dst_fs, "newer", walker, on_copy, workers) @@ -70,21 +72,22 @@ def copy_fs_if( """Copy the contents of one filesystem to another, depending on a condition. Depending on the value of ``strategy``, certain conditions must be fulfilled - for a file to be copied to ``dst_fs``. - - If ``condition`` has the value ``"newer"``, the last modification time - of the source file must be newer than that of the destination file. - If either file has no modification time, the copy is performed always. - - If ``condition`` has the value ``"older"``, the last modification time - of the source file must be older than that of the destination file. - If either file has no modification time, the copy is performed always. - - If ``condition`` has the value ``"exists"``, the source file is only - copied if a file of the same path already exists in ``dst_fs``. + for a file to be copied to ``dst_fs``. The following values + are supported: + + ``"always"`` + The source file is always copied. + ``"newer"`` + The last modification time of the source file must be newer than that of the destination file. + If either file has no modification time, the copy is performed always. + ``"older"`` + The last modification time of the source file must be older than that of the destination file. + If either file has no modification time, the copy is performed always. + ``"exists"`` + The source file is only copied if a file of the same path already exists in ``dst_fs``. + ``"not_exists"`` + The source file is only copied if no file of the same path already exists in ``dst_fs``. - If ``condition`` has the value ``"not_exists"``, the source file is only - copied if no file of the same path already exists in ``dst_fs``. Arguments: src_fs (FS or str): Source filesystem (URL or instance). @@ -140,8 +143,12 @@ def copy_file_if_newer( dst_path, # type: Text ): # type: (...) -> bool - """Deprecated. Use ``copy_file_if``.""" - warnings.warn(DeprecationWarning("copy_file_if_newer is deprecated. Use copy_file_if instead.")) + """Use ``copy_file_if`` instead.""" + warnings.warn( + DeprecationWarning( + "copy_file_if_newer is deprecated. Use copy_file_if instead." + ) + ) return copy_file_if(src_fs, src_path, dst_fs, dst_path, "newer") @@ -156,21 +163,22 @@ def copy_file_if( """Copy a file from one filesystem to another, depending on a condition. Depending on the value of ``strategy``, certain conditions must be fulfilled - for a file to be copied to ``dst_fs``. - - If ``condition`` has the value ``"newer"``, the last modification time - of the source file must be newer than that of the destination file. - If either file has no modification time, the copy is performed always. - - If ``condition`` has the value ``"older"``, the last modification time - of the source file must be older than that of the destination file. - If either file has no modification time, the copy is performed always. + for a file to be copied to ``dst_fs``. The following values + are supported: + + ``"always"`` + The source file is always copied. + ``"newer"`` + The last modification time of the source file must be newer than that of the destination file. + If either file has no modification time, the copy is performed always. + ``"older"`` + The last modification time of the source file must be older than that of the destination file. + If either file has no modification time, the copy is performed always. + ``"exists"`` + The source file is only copied if a file of the same path already exists in ``dst_fs``. + ``"not_exists"`` + The source file is only copied if no file of the same path already exists in ``dst_fs``. - If ``condition`` has the value ``"exists"``, the source file is only - copied if a file of the same path already exists in ``dst_fs``. - - If ``condition`` has the value ``"not_exists"``, the source file is only - copied if no file of the same path already exists in ``dst_fs``. Arguments: src_fs (FS or str): Source filesystem (instance or URL). @@ -309,8 +317,10 @@ def copy_dir_if_newer( workers=0, # type: int ): # type: (...) -> None - """Deprecated. Use ``copy_dir_if``.""" - warnings.warn(DeprecationWarning("copy_dir_if_newer is deprecated. Use copy_dir_if instead.")) + """Use ``copy_dir_if`` instead.""" + warnings.warn( + DeprecationWarning("copy_dir_if_newer is deprecated. Use copy_dir_if instead.") + ) copy_dir_if(src_fs, src_path, dst_fs, dst_path, "newer", walker, on_copy, workers) @@ -328,39 +338,36 @@ def copy_dir_if( """Copy a directory from one filesystem to another, depending on a condition. Depending on the value of ``strategy``, certain conditions must be - fulfilled for a file to be copied to ``dst_fs``. - - If ``condition`` has the value ``"always"``, the source file is always - copied. - - If ``condition`` has the value ``"newer"``, the last modification time - of the source file must be newer than that of the destination file. - If either file has no modification time, the copy is performed always. - - If ``condition`` has the value ``"older"``, the last modification time - of the source file must be older than that of the destination file. - If either file has no modification time, the copy is performed always. - - If ``condition`` has the value ``"exists"``, the source file is only - copied if a file of the same path already exists in ``dst_fs``. - - If ``condition`` has the value ``"not_exists"``, the source file is only - copied if no file of the same path already exists in ``dst_fs``. + fulfilled for a file to be copied to ``dst_fs``. The following values + are supported: + + ``"always"`` + The source file is always copied. + ``"newer"`` + The last modification time of the source file must be newer than that of the destination file. + If either file has no modification time, the copy is performed always. + ``"older"`` + The last modification time of the source file must be older than that of the destination file. + If either file has no modification time, the copy is performed always. + ``"exists"`` + The source file is only copied if a file of the same path already exists in ``dst_fs``. + ``"not_exists"`` + The source file is only copied if no file of the same path already exists in ``dst_fs``. Arguments: - src_fs (FS or str): Source filesystem (instance or URL). - src_path (str): Path to a directory on the source filesystem. - dst_fs (FS or str): Destination filesystem (instance or URL). - dst_path (str): Path to a directory on the destination filesystem. - condition (str): Name of the condition to check for each file. - walker (~fs.walk.Walker, optional): A walker object that will be - used to scan for files in ``src_fs``. Set this if you only want - to consider a sub-set of the resources in ``src_fs``. - on_copy (callable):A function callback called after a single file copy - is executed. Expected signature is ``(src_fs, src_path, dst_fs, - dst_path)``. - workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for - a single-threaded copy. + src_fs (FS or str): Source filesystem (instance or URL). + src_path (str): Path to a directory on the source filesystem. + dst_fs (FS or str): Destination filesystem (instance or URL). + dst_path (str): Path to a directory on the destination filesystem. + condition (str): Name of the condition to check for each file. + walker (~fs.walk.Walker, optional): A walker object that will be + used to scan for files in ``src_fs``. Set this if you only want + to consider a sub-set of the resources in ``src_fs``. + on_copy (callable):A function callback called after a single file copy + is executed. Expected signature is ``(src_fs, src_path, dst_fs, + dst_path)``. + workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for + a single-threaded copy. """ on_copy = on_copy or (lambda *args: None) @@ -434,4 +441,4 @@ def _copy_is_necessary( return not dst_fs.exists(dst_path) else: - raise ValueError(condition + "is not a valid copy condition.") + raise ValueError("{} is not a valid copy condition.".format(condition)) From 766da0141b3f5d7ca8aafb8c0285c26f461d6cd0 Mon Sep 17 00:00:00 2001 From: atollk Date: Mon, 22 Mar 2021 23:28:29 +0100 Subject: [PATCH 6/9] Fixed codestyle --- fs/copy.py | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/fs/copy.py b/fs/copy.py index 55172afd..815e8b36 100644 --- a/fs/copy.py +++ b/fs/copy.py @@ -78,15 +78,19 @@ def copy_fs_if( ``"always"`` The source file is always copied. ``"newer"`` - The last modification time of the source file must be newer than that of the destination file. - If either file has no modification time, the copy is performed always. + The last modification time of the source file must be newer than that + of the destination file. If either file has no modification time, the + copy is performed always. ``"older"`` - The last modification time of the source file must be older than that of the destination file. - If either file has no modification time, the copy is performed always. + The last modification time of the source file must be older than that + of the destination file. If either file has no modification time, the + copy is performed always. ``"exists"`` - The source file is only copied if a file of the same path already exists in ``dst_fs``. + The source file is only copied if a file of the same path already + exists in ``dst_fs``. ``"not_exists"`` - The source file is only copied if no file of the same path already exists in ``dst_fs``. + The source file is only copied if no file of the same path already + exists in ``dst_fs``. Arguments: @@ -99,8 +103,8 @@ def copy_fs_if( on_copy (callable):A function callback called after a single file copy is executed. Expected signature is ``(src_fs, src_path, dst_fs, dst_path)``. - workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for - a single-threaded copy. + workers (int): Use ``worker`` threads to copy data, or ``0`` (default) + for a single-threaded copy. """ return copy_dir_if( @@ -169,15 +173,19 @@ def copy_file_if( ``"always"`` The source file is always copied. ``"newer"`` - The last modification time of the source file must be newer than that of the destination file. - If either file has no modification time, the copy is performed always. + The last modification time of the source file must be newer than that + of the destination file. If either file has no modification time, the + copy is performed always. ``"older"`` - The last modification time of the source file must be older than that of the destination file. - If either file has no modification time, the copy is performed always. + The last modification time of the source file must be older than that + of the destination file. If either file has no modification time, the + copy is performed always. ``"exists"`` - The source file is only copied if a file of the same path already exists in ``dst_fs``. + The source file is only copied if a file of the same path already + exists in ``dst_fs``. ``"not_exists"`` - The source file is only copied if no file of the same path already exists in ``dst_fs``. + The source file is only copied if no file of the same path already + exists in ``dst_fs``. Arguments: @@ -344,15 +352,19 @@ def copy_dir_if( ``"always"`` The source file is always copied. ``"newer"`` - The last modification time of the source file must be newer than that of the destination file. - If either file has no modification time, the copy is performed always. + The last modification time of the source file must be newer than that + of the destination file. If either file has no modification time, the + copy is performed always. ``"older"`` - The last modification time of the source file must be older than that of the destination file. - If either file has no modification time, the copy is performed always. + The last modification time of the source file must be older than that + of the destination file. If either file has no modification time, the + copy is performed always. ``"exists"`` - The source file is only copied if a file of the same path already exists in ``dst_fs``. + The source file is only copied if a file of the same path already + exists in ``dst_fs``. ``"not_exists"`` - The source file is only copied if no file of the same path already exists in ``dst_fs``. + The source file is only copied if no file of the same path already + exists in ``dst_fs``. Arguments: src_fs (FS or str): Source filesystem (instance or URL). From 9f008c5a3246ad86e016431fc60a798127ffce60 Mon Sep 17 00:00:00 2001 From: atollk Date: Wed, 24 Mar 2021 19:37:43 +0100 Subject: [PATCH 7/9] Applied changes suggested by code review. --- fs/copy.py | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/fs/copy.py b/fs/copy.py index 815e8b36..22c7a562 100644 --- a/fs/copy.py +++ b/fs/copy.py @@ -53,9 +53,13 @@ def copy_fs_if_newer( workers=0, # type: int ): # type: (...) -> None - """Use ``copy_fs_if`` instead.""" + """Copy the contents of one filesystem to another, checking times. + + .. deprecated:: 2.5.0 + Use `~fs.copy_fs_if` with ``condition="newer"`` instead. + """ warnings.warn( - DeprecationWarning("copy_fs_if_newer is deprecated. Use copy_fs_if instead.") + "copy_fs_if_newer is deprecated. Use copy_fs_if instead.", DeprecationWarning ) return copy_fs_if(src_fs, dst_fs, "newer", walker, on_copy, workers) @@ -147,11 +151,14 @@ def copy_file_if_newer( dst_path, # type: Text ): # type: (...) -> bool - """Use ``copy_file_if`` instead.""" + """Copy a file from one filesystem to another, checking times. + + .. deprecated:: 2.5.0 + Use `~fs.copy_file_if` with ``condition="newer"`` instead. + """ warnings.warn( - DeprecationWarning( - "copy_file_if_newer is deprecated. Use copy_file_if instead." - ) + "copy_file_if_newer is deprecated. Use copy_file_if instead.", + DeprecationWarning, ) return copy_file_if(src_fs, src_path, dst_fs, dst_path, "newer") @@ -325,9 +332,13 @@ def copy_dir_if_newer( workers=0, # type: int ): # type: (...) -> None - """Use ``copy_dir_if`` instead.""" + """Copy a directory from one filesystem to another, checking times. + + .. deprecated:: 2.5.0 + Use `~fs.copy_dir_if` with ``condition="newer"`` instead. + """ warnings.warn( - DeprecationWarning("copy_dir_if_newer is deprecated. Use copy_dir_if instead.") + "copy_dir_if_newer is deprecated. Use copy_dir_if instead.", DeprecationWarning ) copy_dir_if(src_fs, src_path, dst_fs, dst_path, "newer", walker, on_copy, workers) @@ -371,15 +382,15 @@ def copy_dir_if( src_path (str): Path to a directory on the source filesystem. dst_fs (FS or str): Destination filesystem (instance or URL). dst_path (str): Path to a directory on the destination filesystem. - condition (str): Name of the condition to check for each file. - walker (~fs.walk.Walker, optional): A walker object that will be - used to scan for files in ``src_fs``. Set this if you only want - to consider a sub-set of the resources in ``src_fs``. - on_copy (callable):A function callback called after a single file copy - is executed. Expected signature is ``(src_fs, src_path, dst_fs, - dst_path)``. - workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for - a single-threaded copy. + condition (str): Name of the condition to check for each file. + walker (~fs.walk.Walker, optional): A walker object that will be + used to scan for files in ``src_fs``. Set this if you only want + to consider a sub-set of the resources in ``src_fs``. + on_copy (callable):A function callback called after a single file copy + is executed. Expected signature is ``(src_fs, src_path, dst_fs, + dst_path)``. + workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for + a single-threaded copy. """ on_copy = on_copy or (lambda *args: None) From e63cee23369fe7bca801ee5251bd45e1a9830c13 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Thu, 8 Apr 2021 03:21:17 +0200 Subject: [PATCH 8/9] Fix minor documentation issues in new `fs.copy` functions --- fs/copy.py | 67 ++++++++++++++---------------------------------------- 1 file changed, 17 insertions(+), 50 deletions(-) diff --git a/fs/copy.py b/fs/copy.py index 22c7a562..730a68a4 100644 --- a/fs/copy.py +++ b/fs/copy.py @@ -56,7 +56,8 @@ def copy_fs_if_newer( """Copy the contents of one filesystem to another, checking times. .. deprecated:: 2.5.0 - Use `~fs.copy_fs_if` with ``condition="newer"`` instead. + Use `~fs.copy.copy_fs_if` with ``condition="newer"`` instead. + """ warnings.warn( "copy_fs_if_newer is deprecated. Use copy_fs_if instead.", DeprecationWarning @@ -75,28 +76,6 @@ def copy_fs_if( # type: (...) -> None """Copy the contents of one filesystem to another, depending on a condition. - Depending on the value of ``strategy``, certain conditions must be fulfilled - for a file to be copied to ``dst_fs``. The following values - are supported: - - ``"always"`` - The source file is always copied. - ``"newer"`` - The last modification time of the source file must be newer than that - of the destination file. If either file has no modification time, the - copy is performed always. - ``"older"`` - The last modification time of the source file must be older than that - of the destination file. If either file has no modification time, the - copy is performed always. - ``"exists"`` - The source file is only copied if a file of the same path already - exists in ``dst_fs``. - ``"not_exists"`` - The source file is only copied if no file of the same path already - exists in ``dst_fs``. - - Arguments: src_fs (FS or str): Source filesystem (URL or instance). dst_fs (FS or str): Destination filesystem (URL or instance). @@ -110,6 +89,10 @@ def copy_fs_if( workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for a single-threaded copy. + See Also: + `~fs.copy.copy_file_if` for the full list of supported values for the + ``condition`` argument. + """ return copy_dir_if( src_fs, @@ -154,7 +137,8 @@ def copy_file_if_newer( """Copy a file from one filesystem to another, checking times. .. deprecated:: 2.5.0 - Use `~fs.copy_file_if` with ``condition="newer"`` instead. + Use `~fs.copy.copy_file_if` with ``condition="newer"`` instead. + """ warnings.warn( "copy_file_if_newer is deprecated. Use copy_file_if instead.", @@ -173,9 +157,9 @@ def copy_file_if( # type: (...) -> bool """Copy a file from one filesystem to another, depending on a condition. - Depending on the value of ``strategy``, certain conditions must be fulfilled - for a file to be copied to ``dst_fs``. The following values - are supported: + Depending on the value of ``condition``, certain requirements must + be fulfilled for a file to be copied to ``dst_fs``. The following + values are supported: ``"always"`` The source file is always copied. @@ -194,7 +178,6 @@ def copy_file_if( The source file is only copied if no file of the same path already exists in ``dst_fs``. - Arguments: src_fs (FS or str): Source filesystem (instance or URL). src_path (str): Path to a file on the source filesystem. @@ -335,7 +318,8 @@ def copy_dir_if_newer( """Copy a directory from one filesystem to another, checking times. .. deprecated:: 2.5.0 - Use `~fs.copy_dir_if` with ``condition="newer"`` instead. + Use `~fs.copy.copy_dir_if` with ``condition="newer"`` instead. + """ warnings.warn( "copy_dir_if_newer is deprecated. Use copy_dir_if instead.", DeprecationWarning @@ -356,27 +340,6 @@ def copy_dir_if( # type: (...) -> None """Copy a directory from one filesystem to another, depending on a condition. - Depending on the value of ``strategy``, certain conditions must be - fulfilled for a file to be copied to ``dst_fs``. The following values - are supported: - - ``"always"`` - The source file is always copied. - ``"newer"`` - The last modification time of the source file must be newer than that - of the destination file. If either file has no modification time, the - copy is performed always. - ``"older"`` - The last modification time of the source file must be older than that - of the destination file. If either file has no modification time, the - copy is performed always. - ``"exists"`` - The source file is only copied if a file of the same path already - exists in ``dst_fs``. - ``"not_exists"`` - The source file is only copied if no file of the same path already - exists in ``dst_fs``. - Arguments: src_fs (FS or str): Source filesystem (instance or URL). src_path (str): Path to a directory on the source filesystem. @@ -392,6 +355,10 @@ def copy_dir_if( workers (int): Use ``worker`` threads to copy data, or ``0`` (default) for a single-threaded copy. + See Also: + `~fs.copy.copy_file_if` for the full list of supported values for the + ``condition`` argument. + """ on_copy = on_copy or (lambda *args: None) walker = walker or Walker() From 2d04cca45328c178eae426b6bb4821020c907bf7 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Thu, 8 Apr 2021 03:26:42 +0200 Subject: [PATCH 9/9] Remove requests for `modified` namespace in `fs.copy._copy_is_necessary` --- fs/copy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/copy.py b/fs/copy.py index 730a68a4..03108c00 100644 --- a/fs/copy.py +++ b/fs/copy.py @@ -332,7 +332,7 @@ def copy_dir_if( src_path, # type: Text dst_fs, # type: Union[FS, Text] dst_path, # type: Text - condition="always", # type: Text + condition, # type: Text walker=None, # type: Optional[Walker] on_copy=None, # type: Optional[_OnCopy] workers=0, # type: int @@ -398,7 +398,7 @@ def _copy_is_necessary( elif condition == "newer": try: - namespace = ("details", "modified") + namespace = ("details",) src_modified = src_fs.getinfo(src_path, namespace).modified dst_modified = dst_fs.getinfo(dst_path, namespace).modified except ResourceNotFound: @@ -412,7 +412,7 @@ def _copy_is_necessary( elif condition == "older": try: - namespace = ("details", "modified") + namespace = ("details",) src_modified = src_fs.getinfo(src_path, namespace).modified dst_modified = dst_fs.getinfo(dst_path, namespace).modified except ResourceNotFound: