diff --git a/pyrax/object_storage.py b/pyrax/object_storage.py index 29c9c449..dfd7afcf 100644 --- a/pyrax/object_storage.py +++ b/pyrax/object_storage.py @@ -1909,8 +1909,11 @@ def _upload(self, obj_name, content, content_type, content_encoding, sequence = str(segment + 1).zfill(digits) seg_name = "%s.%s" % (obj_name, sequence) with utils.SelfDeletingTempfile() as tmpname: + # Write the temporary file in small pieces, to be memory efficient. with open(tmpname, "wb") as tmp: - tmp.write(content.read(MAX_FILE_SIZE)) + for chunk in utils.read_in_chunks(content, + max_size=MAX_FILE_SIZE): + tmp.write(chunk) with open(tmpname, "rb") as tmp: # We have to calculate the etag for each segment etag = utils.get_checksum(tmp) diff --git a/pyrax/utils.py b/pyrax/utils.py index 712a97c0..d7f2c776 100644 --- a/pyrax/utils.py +++ b/pyrax/utils.py @@ -784,3 +784,17 @@ def to_slug(value, incoming=None, errors="strict"): # For backwards compatibility, alias slugify to point to_slug slugify = to_slug + +def read_in_chunks(file_object, max_size, chunk_size=8192): + bytes_left_to_read = int(max_size) + chunk_size = int(chunk_size) + # Set read size to pick the smaller of the two values + # in case max_size is smaller than the default chunk size + read_size = min(max_size, chunk_size) + while read_size > 0: + data = file_object.read(read_size) + bytes_left_to_read -= chunk_size + read_size = min(bytes_left_to_read, chunk_size) + if not data: + break + yield data diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 7c19452d..94d52505 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -422,6 +422,54 @@ def test_update_exc(self): ret = utils.update_exc(err, msg2, before=False, separator=sep) self.assertEqual(ret.message, exp) + def test_read_in_chunks(self): + # create junk file to test size. + source_file = "source_file.dat" + target_file = "target_file.dat" + def compare_contents(source_file_name, target_file_name): + # compare whatever is contained in the target file to the first part of the source file + with open(source_file_name, "rb") as source_handle, open(target_file_name, "rb") as target_handle: + read_size = os.path.getsize(target_file_name) + target_contents = target_handle.read(read_size) + source_contents = source_handle.read(read_size) + self.assertEqual(target_contents, source_contents) + # Try block is just to make sure we delete the files. + # Using only main python libraries to hopefully improve test reliability. + try: + # Write something into the source file + with open(source_file, "wb") as source: + source.write(os.urandom(1024)) + # Make sure it's the size we're expecting + self.assertEqual(1024, os.path.getsize(source_file)) + # Now test different sizing cases for file consistency. + # test max_size smaller than chunk size + with open(target_file, "wb") as target, open(source_file, "rb") as source: + for chunk in utils.read_in_chunks(source, max_size=1, chunk_size=1024): + target.write(chunk) + compare_contents(source_file, target_file) + os.unlink(target_file) + # test max_size larger than chunk size + with open(target_file, "wb") as target, open(source_file, "rb") as source: + for chunk in utils.read_in_chunks(source, max_size=512, chunk_size=64): + target.write(chunk) + compare_contents(source_file, target_file) + os.unlink(target_file) + # test max_size equal with chunk size + with open(target_file, "wb") as target, open(source_file, "rb") as source: + for chunk in utils.read_in_chunks(source, max_size=512, chunk_size=512): + target.write(chunk) + compare_contents(source_file, target_file) + os.unlink(target_file) + os.unlink(source_file) + + except: + raise + finally: + # Remove the test files we made! + if os.path.exists(source_file): + os.unlink(source_file) + if os.path.exists(target_file): + os.unlink(target_file) if __name__ == "__main__": unittest.main()