diff --git a/.github/workflows/famedly-tests.yml b/.github/workflows/famedly-tests.yml index a328a6f4f6..fac74cdd6d 100644 --- a/.github/workflows/famedly-tests.yml +++ b/.github/workflows/famedly-tests.yml @@ -25,7 +25,7 @@ jobs: - uses: Swatinem/rust-cache@68b3cb7503c78e67dae8373749990a220eb65352 - uses: matrix-org/setup-python-poetry@v2 with: - python-version: "3.x" + python-version: "3.13" poetry-version: "2.1.1" extras: "all" - run: poetry run scripts-dev/generate_sample_config.sh --check @@ -49,7 +49,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: "3.13" - run: .ci/scripts/check_lockfile.py lint: @@ -63,6 +63,7 @@ jobs: uses: matrix-org/setup-python-poetry@v2 with: poetry-version: "2.1.1" + python-version: "3.13" install-project: "false" - name: Run ruff check @@ -91,6 +92,7 @@ jobs: # https://github.com/matrix-org/synapse/pull/15376#issuecomment-1498983775 # To make CI green, err towards caution and install the project. install-project: "true" + python-version: "3.13" poetry-version: "2.1.1" # Cribbed from @@ -124,6 +126,7 @@ jobs: - uses: matrix-org/setup-python-poetry@v2 with: poetry-version: "2.1.1" + python-version: "3.13" extras: "all" - run: poetry run scripts-dev/check_pydantic_models.py @@ -161,7 +164,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: "3.13" - run: "pip install rstcheck" - run: "rstcheck --report-level=WARNING README.rst" diff --git a/rust/src/http_client.rs b/rust/src/http_client.rs index b6cdf98f55..4ba8c438a7 100644 --- a/rust/src/http_client.rs +++ b/rust/src/http_client.rs @@ -137,7 +137,7 @@ fn get_runtime<'a>(reactor: &Bound<'a, PyAny>) -> PyResult = OnceCell::new(); /// Access to the `twisted.internet.defer` module. -fn defer(py: Python<'_>) -> PyResult<&Bound> { +fn defer(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { Ok(DEFER .get_or_try_init(|| py.import("twisted.internet.defer").map(Into::into))? .bind(py)) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index f1566616e9..205b5a9ef5 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -20,7 +20,6 @@ # # import errno -import io import logging import os import shutil @@ -168,7 +167,6 @@ async def create_or_update_content( auth_user: UserID, media_id: Optional[str] = None, restricted: bool = False, - sha256: Optional[str] = None, ) -> MXCUri: raise NotImplementedError( "Sorry Mario, your MediaRepository related function is in another castle" @@ -764,7 +762,6 @@ async def create_or_update_content( auth_user: UserID, media_id: Optional[str] = None, restricted: bool = False, - sha256: Optional[str] = None, ) -> MXCUri: """Create or update the content of the given media ID. @@ -786,17 +783,14 @@ async def create_or_update_content( if media_id is None: media_id = random_string(24) - # if self.use_sha256_path and media_id and not sha256: - # sha256 = await self.store.get_sha_by_media_id(media_id) - - file_info = FileInfo(server_name=None, file_id=media_id, sha256=sha256) - sha256reader = SHA256TransparentIOReader(content) - data = sha256reader.read() + # Do a read() before a hexdigest(), or the hash will not be of the complete file + sha256reader.read() sha256 = sha256reader.hexdigest() + file_info = FileInfo(server_name=None, file_id=media_id, sha256=sha256) - # This implements all of IO as it has a passthrough - fname = await self.media_storage.store_file(io.BytesIO(data), file_info) + # store_file() has an inner function that does a seek(0) on the IO + fname = await self.media_storage.store_file(sha256reader.wrap(), file_info) should_quarantine = await self.store.get_is_hash_quarantined(sha256) logger.info("Stored local media in file %r", fname) @@ -806,17 +800,6 @@ async def create_or_update_content( "Media has been automatically quarantined as it matched existing quarantined media" ) - # If we enabled sha256 paths, we create a new file with sha256 path, and delete - # the original media_id path. - if self.use_sha256_path: - if not os.path.exists(self.filepaths.filepath_sha(sha256)): - file_info = FileInfo(server_name=None, file_id=media_id, sha256=sha256) - sha256_fname = await self.media_storage.store_file(content, file_info) - logger.info("Stored media in file %r", sha256_fname) - os.remove(fname) - else: - logger.info("File already exists in sha256 path.") - # Check that the user has not exceeded any of the media upload limits. # This is the total size of media uploaded by the user in the last @@ -873,9 +856,9 @@ async def create_or_update_content( await self._generate_thumbnails( server_name=None, media_id=media_id, - file_id=sha256 if self.use_sha256_path else media_id, + file_id=media_id, media_type=media_type, - sha256=sha256 if self.use_sha256_path else None, + sha256=sha256, ) except Exception as e: logger.info("Failed to generate thumbnails: %s", e) @@ -928,7 +911,6 @@ async def copy_media( old_media_info.media_length, auth_user, restricted=True, - sha256=old_media_info.sha256, ) # I could not find a place this was close()'d explicitly, but this felt prudent io_object.close() @@ -1301,6 +1283,8 @@ async def _get_remote_media_impl( # Failed to find the file anywhere, lets download it. try: + # Both of these retrieval functions will stage the file, so after they + # complete the migration function should be called below if not use_federation_endpoint: media_info = await self._download_remote_file( server_name, @@ -1342,32 +1326,26 @@ async def _get_remote_media_impl( if not media_info.media_type: media_info = attr.evolve(media_info, media_type="application/octet-stream") + # Now that the file has been retrieved, need to see if it needs to be migrated. + # Should probably do this before generating thumbnails file_info = FileInfo( - server_name=server_name, - file_id=media_info.sha256 - if self.use_sha256_path and media_info.sha256 - else file_id, - sha256=media_info.sha256, + server_name=server_name, file_id=file_id, sha256=media_info.sha256 ) + await self.media_storage.maybe_move_media_file_from_id_to_sha256_path(file_info) + # We generate thumbnails even if another process downloaded the media # as a) it's conceivable that the other download request dies before it # generates thumbnails, but mainly b) we want to be sure the thumbnails # have finished being generated before responding to the client, # otherwise they'll request thumbnails and get a 404 if they're not # ready yet. - if self.use_sha256_path: - assert media_info.sha256 is not None - await self._generate_thumbnails( - server_name=server_name, - media_id=media_id, - file_id=media_info.sha256, # Passing over sha256 for file_id if sha256 path is enabled. - media_type=media_info.media_type, - sha256=media_info.sha256, - ) - else: - await self._generate_thumbnails( - server_name, media_id, file_id, media_info.media_type - ) + await self._generate_thumbnails( + server_name=server_name, + media_id=media_id, + file_id=file_id, + media_type=media_info.media_type, + sha256=media_info.sha256, + ) responder = await self.media_storage.fetch_media(file_info) return responder, media_info @@ -1474,7 +1452,7 @@ async def _download_remote_file( time_now_ms=time_now_ms, upload_name=upload_name, media_length=length, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, sha256=sha256, ) @@ -1485,14 +1463,6 @@ async def _download_remote_file( else: authenticated = False - # If sha256 paths are enabled, rename the file to sha256 path and delete the original media_id path. - if self.use_sha256_path: - rel_path = self.filepaths.filepath_sha(sha256) - abs_path = os.path.join(self.hs.config.media.media_store_path, rel_path) - os.makedirs(os.path.dirname(abs_path), exist_ok=True) - os.rename(fname, abs_path) - os.remove(fname) - return RemoteMedia( media_origin=server_name, media_id=media_id, @@ -1500,7 +1470,7 @@ async def _download_remote_file( media_length=length, upload_name=upload_name, created_ts=time_now_ms, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, last_access_ts=time_now_ms, quarantined_by=None, authenticated=authenticated, @@ -1642,7 +1612,7 @@ async def _federation_download_remote_file( time_now_ms=time_now_ms, upload_name=upload_name, media_length=length, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, sha256=sha256, restricted=restricted, ) @@ -1668,14 +1638,6 @@ async def _federation_download_remote_file( else: authenticated = False - # If sha256 paths are enabled, rename the file to sha256 path and delete the original media_id path. - if self.use_sha256_path: - rel_path = self.filepaths.filepath_sha(sha256) - abs_path = os.path.join(self.hs.config.media.media_store_path, rel_path) - os.makedirs(os.path.dirname(abs_path), exist_ok=True) - os.rename(fname, abs_path) - os.remove(fname) - return RemoteMedia( media_origin=server_name, media_id=media_id, @@ -1683,7 +1645,7 @@ async def _federation_download_remote_file( media_length=length, upload_name=upload_name, created_ts=time_now_ms, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, last_access_ts=time_now_ms, quarantined_by=None, authenticated=authenticated, @@ -1931,7 +1893,7 @@ async def _generate_thumbnails( file_info = FileInfo( server_name, - file_id, # This will be the sha256 if sha256 path is enabled. Otherwise, it will be the file_id. + file_id, url_cache=url_cache, sha256=sha256, ) @@ -2014,9 +1976,7 @@ async def _generate_thumbnails( file_info = FileInfo( server_name=server_name, - file_id=sha256 - if self.use_sha256_path and sha256 - else file_id, # Saving the thumbnail with sha256 path if sha256 path is enabled. + file_id=file_id, url_cache=url_cache, thumbnail=ThumbnailInfo( width=t_width, @@ -2138,22 +2098,23 @@ async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]: logger.info("Deleting: %r", key) # TODO: Should we delete from the backup store + # Jason: Probably, we literally don't delete from it anywhere else async with self.remote_media_linearizer.queue(key): - full_path = self.filepaths.remote_media_filepath(origin, file_id) - try: - os.remove(full_path) - except OSError as e: - logger.warning("Failed to remove file: %r", full_path) - if e.errno == errno.ENOENT: - pass - else: - continue + paths = [self.filepaths.remote_media_filepath(origin, file_id)] + thumbnail_paths = [ + self.filepaths.remote_media_thumbnail_dir(origin, file_id) + ] - thumbnail_dir = self.filepaths.remote_media_thumbnail_dir( - origin, file_id - ) - shutil.rmtree(thumbnail_dir, ignore_errors=True) + sha256 = await self.store.get_sha_for_remote_media_id(origin, media_id) + # In the interest of removing media that was de-duplicated, only purge + # the actual media object if there are no more references to it in the + # database. + if await self.store.get_media_reference_count_for_sha256(sha256) <= 1: + paths.append(self.filepaths.filepath_sha(sha256)) + thumbnail_paths.append(self.filepaths.thumbnail_sha_dir(sha256)) + + _remove_media_files(paths, thumbnail_paths) await self.store.delete_remote_media(origin, media_id) deleted += 1 @@ -2164,10 +2125,10 @@ async def delete_local_media_ids( self, media_ids: List[str] ) -> Tuple[List[str], int]: """ - Delete the given local or remote media ID from this server + Delete the given local media ID from this server Args: - media_id: The media ID to delete. + media_ids: The list of media IDs to delete. Returns: A tuple of (list of deleted media IDs, total deleted media IDs). """ @@ -2182,7 +2143,7 @@ async def delete_old_local_media( delete_protected_media: bool = False, ) -> Tuple[List[str], int]: """ - Delete local or remote media from this server by size and timestamp. Removes + Delete local media from this server by size and timestamp. Removes media files, any thumbnails and cached URLs. Args: @@ -2210,7 +2171,7 @@ async def _remove_local_media_from_disk( self, media_ids: List[str] ) -> Tuple[List[str], int]: """ - Delete local or remote media from this server. Removes media files, + Delete local media from this server. Removes media files, any thumbnails and cached URLs. Args: @@ -2221,31 +2182,53 @@ async def _remove_local_media_from_disk( removed_media = [] for media_id in media_ids: logger.info("Deleting media with ID '%s'", media_id) - sha256 = await self.store.get_sha_by_media_id(media_id, None) paths = [ self.filepaths.local_media_filepath(media_id), - self.filepaths.filepath_sha(sha256), ] - for path in paths: - try: - os.remove(path) - except OSError as e: - logger.warning("Failed to remove file: %r: %s", path, e) - if e.errno == errno.ENOENT: - pass - else: - continue + thumbnail_paths = [self.filepaths.local_media_thumbnail_dir(media_id)] + + sha256 = await self.store.get_sha_for_local_media_id(media_id) + # In the interest of removing media that was de-duplicated, only purge + # the actual media object if there are no more references to it in the + # database. + if await self.store.get_media_reference_count_for_sha256(sha256) <= 1: + paths.append(self.filepaths.filepath_sha(sha256)) + thumbnail_paths.append(self.filepaths.thumbnail_sha_dir(sha256)) - thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id) - thumbnail_dir_sha = self.filepaths.thumbnail_sha_dir(sha256) - shutil.rmtree(thumbnail_dir, ignore_errors=True) - shutil.rmtree(thumbnail_dir_sha, ignore_errors=True) + _remove_media_files(paths, thumbnail_paths) + # Poorly named, this deletes media *from this server* that is somehow + # considered "remote". Observe the server name argument below await self.store.delete_remote_media(self.server_name, media_id) await self.store.delete_url_cache((media_id,)) + # Poorly named, but this is the one that cleans out local media metadata await self.store.delete_url_cache_media((media_id,)) removed_media.append(media_id) return removed_media, len(removed_media) + + +def _remove_media_files(paths: List[str], thumbnail_paths: List[str]) -> None: + """ + Remove the select files from the filesystem. IO related errors, such as non-existent + files, will be ignored. + + Args: + paths: The absolute path to a specific file to be removed + thumbnail_paths: The absolute path to a specific directory. The entire directory + will be removed. + """ + for path in paths: + try: + os.remove(path) + except OSError as e: + logger.warning("Failed to remove file: %r: %s", path, e) + if e.errno == errno.ENOENT: + pass + else: + continue + + for thumbnail_path in thumbnail_paths: + shutil.rmtree(thumbnail_path, ignore_errors=True) diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py index c67d3f72d1..8678ec2259 100644 --- a/synapse/media/media_storage.py +++ b/synapse/media/media_storage.py @@ -226,7 +226,7 @@ async def store_into_file( # .. write into f ... """ # With `use_sha256_path` enabled, new files are stored with SHA256 paths. - path = self._file_info_to_path(file_info) + path = self.resolve_path_to_media_file(file_info) fname = os.path.join(self.local_media_directory, path) dirname = os.path.dirname(fname) os.makedirs(dirname, exist_ok=True) @@ -251,9 +251,12 @@ async def store_into_file( # the spam-check API. raise SpamMediaException(errcode=spam_check[0]) + # Unlike the local cache, the storage providers are on their own for + # de-duplicating media. They get a relative path to work with + relative_legacy_path = self._file_info_to_path(file_info) for provider in self.storage_providers: with start_active_span(str(provider)): - await provider.store_file(path, file_info) + await provider.store_file(relative_legacy_path, file_info) except Exception as e: try: @@ -273,10 +276,7 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: Returns: Returns a Responder if the file was found, otherwise None. """ - paths = [self._file_info_to_path(file_info, force_sha256=False)] - - if self.use_sha256_path: - paths.append(self._file_info_to_path(file_info)) + paths = [self.resolve_path_to_media_file(file_info)] # fallback for remote thumbnails with no method in the filename if file_info.thumbnail and file_info.server_name: @@ -297,6 +297,10 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: return FileResponder(self.hs, open(local_path, "rb")) logger.debug("local file %s did not exist", local_path) + # Unlike the local cache, the storage providers are on their own for + # de-duplicating media. They get a relative path to work with + relative_legacy_path = self._file_info_to_path(file_info) + paths = [relative_legacy_path] for provider in self.storage_providers: for path in paths: res: Any = await provider.fetch(path, file_info) @@ -318,14 +322,7 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: Returns: Full path to local file """ - # When `use_sha256_path` is enabled, we first check for the SHA256 path. - if self.use_sha256_path and not file_info.url_cache: - path = self._file_info_to_path(file_info) - local_path = os.path.join(self.local_media_directory, path) - if os.path.exists(local_path): - return local_path - - path = self._file_info_to_path(file_info, force_sha256=False) + path = self.resolve_path_to_media_file(file_info) local_path = os.path.join(self.local_media_directory, path) if os.path.exists(local_path): return local_path @@ -347,8 +344,11 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: dirname = os.path.dirname(local_path) os.makedirs(dirname, exist_ok=True) + # Unlike the local cache, the storage providers are on their own for + # de-duplicating media. They get a relative path to work with + relative_legacy_path = self._file_info_to_path(file_info) for provider in self.storage_providers: - res: Any = await provider.fetch(path, file_info) + res: Any = await provider.fetch(relative_legacy_path, file_info) if res: with res: consumer = BackgroundFileConsumer( @@ -361,9 +361,7 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: raise NotFoundError() @trace - def _file_info_to_path( - self, file_info: FileInfo, force_sha256: Optional[bool] = None - ) -> str: + def _file_info_to_path(self, file_info: FileInfo) -> str: """ Converts file_info to a relative path. @@ -372,22 +370,7 @@ def _file_info_to_path( Args: file_info: File information - force_sha256: If True, force SHA256 path; if False, force media_id path, - if None, use self.use_sha256_path flag. """ - use_sha256 = force_sha256 if force_sha256 is not None else self.use_sha256_path - if use_sha256 and file_info.sha256 and not file_info.url_cache: - if file_info.thumbnail: - return self.filepaths.thumbnail_sha_rel( - sha256=file_info.sha256, - width=file_info.thumbnail.width, - height=file_info.thumbnail.height, - content_type=file_info.thumbnail.type, - method=file_info.thumbnail.method, - ) - return self.filepaths.filepath_sha_rel(file_info.sha256) - - # Fall back to media_id-based paths if file_info.url_cache: if file_info.thumbnail: return self.filepaths.url_cache_thumbnail_rel( @@ -423,6 +406,130 @@ def _file_info_to_path( ) return self.filepaths.local_media_filepath_rel(file_info.file_id) + def _file_info_to_sha256_path(self, file_info: FileInfo) -> str: + """ + Resolve the path based on the sha256 hash. This does not work for the url cache. + + Returns: + The relative path to where the file should be + """ + # There should not be a case where a url preview should be cached in sha256 + assert not file_info.url_cache, "Can not use sha256 paths with url caching" + + assert file_info.sha256 is not None, ( + "Can not resolve a path to a sha256 object without a hash" + ) + if file_info.thumbnail: + return self.filepaths.thumbnail_sha_rel( + sha256=file_info.sha256, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.filepath_sha_rel(file_info.sha256) + + def resolve_path_to_media_file(self, file_info: FileInfo) -> str: + """ + Fully resolve the filesystem path to the media object. Allow for backwards + compatibility with existing media_id-based file names. + + Arguments: + file_info: The FileInfo object that contains the data for looking up the + file path + Returns: + The fully resolved, relative path to the file request. + """ + + media_id_path = self._file_info_to_path(file_info) + + if not file_info.sha256 or file_info.url_cache: + return media_id_path + sha256_path = self._file_info_to_sha256_path(file_info) + + media_id_abs_path = os.path.join(self.local_media_directory, media_id_path) + media_id_file_exists = os.path.exists(media_id_abs_path) + + sha256_abs_path = os.path.join(self.local_media_directory, sha256_path) + sha256_file_exists = os.path.exists(sha256_abs_path) + + logger.debug( + "Verifying media file exists at path %s: %r", + media_id_abs_path, + media_id_file_exists, + ) + logger.debug( + "Verifying media file exists at sha256 path %s: %r", + sha256_abs_path, + sha256_file_exists, + ) + + # When `use_sha256_path` is not enabled, but might have been in the past, use + # that file if it already exists. Perhaps the admin changed their mind? + if sha256_file_exists: + return sha256_path + if media_id_file_exists: + return media_id_path + return sha256_path if self.use_sha256_path else media_id_path + + async def maybe_move_media_file_from_id_to_sha256_path( + self, file_info: FileInfo + ) -> None: + """ + Move a media file from a legacy "media_id" based path to a "sha256" based path. + This is most appropriate after using `store_into_file()` for federation + downloaded media. + """ + # Check the old media id path for the file, then check the sha256 path. + # If media exists on the old path, and use_sha256_path is enabled, move + # the file. Migration is one way + + if not self.use_sha256_path: + logger.debug("Can not move media file if de-deduplication turned off") + return + if not file_info.sha256: + logger.warning( + "Sha256 data on media/file id '%s' is missing. Skipping file move", + file_info.file_id, + ) + return + + media_id_path = self._file_info_to_path(file_info) + media_id_abs_path = os.path.join(self.local_media_directory, media_id_path) + media_id_file_exists = os.path.exists(media_id_abs_path) + logger.debug( + "Verifying media file exists at path %s: %r", + media_id_abs_path, + media_id_file_exists, + ) + + sha256_path = self._file_info_to_sha256_path(file_info) + sha256_abs_path = os.path.join(self.local_media_directory, sha256_path) + sha256_file_exists = os.path.exists(sha256_abs_path) + logger.debug( + "Verifying media file exists at sha256 path %s: %r", + sha256_abs_path, + sha256_file_exists, + ) + + if media_id_file_exists and not sha256_file_exists: + os.makedirs(os.path.dirname(sha256_abs_path), exist_ok=True) + # os.rename() should be an atomic operation + os.rename(media_id_abs_path, sha256_abs_path) + logger.debug( + "Moved file from old media id path to new sha256 path: %s", + sha256_abs_path, + ) + + # Having `use_sha256_path` enabled is our signal that the server prefers using + # the newer path style + if media_id_file_exists and sha256_file_exists: + # Probably should clean up the old duplicate? Do we want a feature flag? + os.remove(media_id_abs_path) + logger.debug( + "Both paths existed, deleting old media id path: %s", media_id_abs_path + ) + @trace def _write_file_synchronously(source: IO, dest: IO) -> None: diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py index 914d096355..35c84834b4 100644 --- a/synapse/media/thumbnailer.py +++ b/synapse/media/thumbnailer.py @@ -392,6 +392,7 @@ async def select_or_generate_local_thumbnail( file_id=media_id, url_cache=bool(media_info.url_cache), thumbnail=info, + sha256=media_info.sha256, ) responder = await self.media_storage.fetch_media(file_info) @@ -507,6 +508,7 @@ async def select_or_generate_remote_thumbnail( server_name=server_name, file_id=file_id, thumbnail=info, + sha256=media_info.sha256, ) responder = await self.media_storage.fetch_media(file_info) @@ -527,7 +529,7 @@ async def select_or_generate_remote_thumbnail( desired_height, desired_method, desired_type, - media_info.sha256 if self.use_sha256_path else None, + media_info.sha256, ) if file_path: @@ -596,9 +598,7 @@ async def respond_remote_thumbnail( url_cache=False, server_name=server_name, for_federation=False, - sha256=media_info.sha256 - if self.use_sha256_path and media_info.sha256 - else None, + sha256=media_info.sha256, ) async def _select_and_respond_with_thumbnail( @@ -658,7 +658,7 @@ async def _select_and_respond_with_thumbnail( file_id, url_cache, server_name, - sha256=sha256 if self.use_sha256_path and sha256 else None, + sha256=sha256, ) if not file_info: logger.info("Couldn't find a thumbnail matching the desired inputs") diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 22fe8622c7..6d794dec98 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -1309,8 +1309,9 @@ def set_media_restricted_to_user_profile_txn( json_dict=json_object, ) - async def get_sha_by_media_id( - self, media_id: str, server_name: Optional[str] = None + async def get_sha_for_local_media_id( + self, + media_id: str, ) -> str: """ Get the media ID of and return SHA256 hash of given media. @@ -1322,22 +1323,39 @@ async def get_sha_by_media_id( LIMIT 1 """ - if server_name and not self.hs.is_mine_server_name(server_name): - sql = """ - SELECT sha256 - FROM remote_media_cache - WHERE media_origin = ? AND media_id = ? - ORDER BY created_ts ASC - LIMIT 1 - """ + def _get_sha_for_local_media_id_txn( + txn: LoggingTransaction, + ) -> str: + txn.execute(sql, (media_id,)) + + rows = txn.fetchone() + if not rows: + raise ValueError("media_id %s has no sha256 field", media_id) + return rows[0] + + return await self.db_pool.runInteraction( + "get_sha_for_local_media_id", _get_sha_for_local_media_id_txn + ) + + async def get_sha_for_remote_media_id(self, server_name: str, media_id: str) -> str: + """ + Get the media ID of and return SHA256 hash of given media. + """ + assert not self.hs.is_mine_server_name(server_name), ( + "Can not run against own server" + ) + sql = """ + SELECT sha256 + FROM remote_media_cache + WHERE media_origin = ? AND media_id = ? + ORDER BY created_ts ASC + LIMIT 1 + """ - def _get_sha_by_media_id_txn( + def _get_sha_for_remote_media_id_txn( txn: LoggingTransaction, ) -> str: - if server_name and not self.hs.is_mine_server_name(server_name): - txn.execute(sql, (server_name, media_id)) - else: - txn.execute(sql, (media_id,)) + txn.execute(sql, (server_name, media_id)) rows = txn.fetchone() if not rows: @@ -1345,5 +1363,29 @@ def _get_sha_by_media_id_txn( return rows[0] return await self.db_pool.runInteraction( - "get_sha_by_media_id", _get_sha_by_media_id_txn + "get_sha_for_remote_media_id", _get_sha_for_remote_media_id_txn + ) + + async def get_media_reference_count_for_sha256(self, sha256: str) -> int: + """Get total number of local + remote media entries for this SHA256.""" + + def _get_reference_count_txn(txn: LoggingTransaction) -> int: + sql = """ + SELECT SUM(total_rows) AS grand_total_rows + FROM ( + SELECT COUNT(*) AS total_rows FROM local_media_repository WHERE sha256 = ? + UNION ALL + SELECT COUNT(*) AS total_rows FROM remote_media_cache WHERE sha256 = ? + ) AS combined_counts + """ + + txn.execute(sql, (sha256, sha256)) + row_tuple = txn.fetchone() + if row_tuple is not None: + (row,) = row_tuple + return row + return 0 + + return await self.db_pool.runInteraction( + "get_media_reference_count_for_sha256", _get_reference_count_txn ) diff --git a/tests/media/test_media_repository.py b/tests/media/test_media_repository.py index 996e521af1..f5610fe521 100644 --- a/tests/media/test_media_repository.py +++ b/tests/media/test_media_repository.py @@ -76,47 +76,107 @@ def _store_media_with_path(self, file_info: FileInfo, expected_path: str) -> Non assert os.path.exists(fname), f"File does not exist: {fname}" def _create_local_media_with_media_id_path(self, user: str) -> MXCUri: + """ + Force creation of a media object at the specific place based on the media id + path. This does assert that the file exists where it is expected to be + """ assert isinstance(self.repo, MediaRepository) - self.repo.use_sha256_path = False - mxc_uri = self.get_success( - self.repo.create_or_update_content( + media_id = random_string(24) + # Curate a specialized FileInfo that is lacking sha256 data, then file will be + # forced to the old media_id path + file_info = FileInfo( + server_name=None, + file_id=media_id, + ) + + expected_path = self.repo.filepaths.local_media_filepath(media_id) + ctx = self.repo.media_storage.store_into_file(file_info) + (f, fname) = self.get_success(ctx.__aenter__()) + f.write(SMALL_PNG) + self.get_success(ctx.__aexit__(None, None, None)) + + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, media_type="image/png", + time_now_ms=self.clock.time_msec(), upload_name="original_media", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(user), - media_id=None, - restricted=True, + media_length=67, + user_id=UserID.from_string(user), + sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, ) ) - media_id = mxc_uri.media_id + assert expected_path == fname + assert os.path.exists(fname), f"File does not exist: {fname}" + assert os.path.exists(expected_path), f"File does not exist: {expected_path}" + + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, + media_type="image/png", + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=None, + ) + ) + return MXCUri(server_name=self.hs.config.server.server_name, media_id=media_id) + + def _create_local_media_with_sha256_path(self, user: str) -> MXCUri: + assert isinstance(self.repo, MediaRepository) + media_id = random_string(24) + # Curate a specialized FileInfo that includes sha256 data, then file will be + # forced to the new sha256 path file_info = FileInfo( server_name=None, file_id=media_id, + sha256=SMALL_PNG_SHA256, ) - assert isinstance(self.repo, MediaRepository) - expected_path = self.repo.filepaths.local_media_filepath(media_id) - self._store_media_with_path(file_info, expected_path) - assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - self.repo.use_sha256_path = True - return mxc_uri - def _create_local_media_with_sha256_path(self, user: str) -> MXCUri: - expected_path = os.path.join("media", SMALL_PNG_SHA256_PATH) - mxc_uri = self.get_success( - self.repo.create_or_update_content( + expected_path = self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256) + ctx = self.repo.media_storage.store_into_file(file_info) + (f, fname) = self.get_success(ctx.__aenter__()) + f.write(SMALL_PNG) + self.get_success(ctx.__aexit__(None, None, None)) + + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, media_type="image/png", - upload_name="test_png_upload", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(user), - media_id=None, - restricted=True, + time_now_ms=self.clock.time_msec(), + upload_name="original_media", + media_length=67, + user_id=UserID.from_string(user), sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, ) ) + assert expected_path == fname + assert os.path.exists(fname), f"File does not exist: {fname}" assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - return mxc_uri + + # Some tests expect thumbnails, remember to generate them + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, + media_type="image/png", + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=SMALL_PNG_SHA256, + ) + ) + + return MXCUri(server_name=self.hs.config.server.server_name, media_id=media_id) def _create_remote_media_with_media_id_path(self, server_name: str) -> str: media_id = random_string(24) @@ -213,6 +273,10 @@ def test_create_or_update_content_creates_new_content_with_sha256_path( def test_create_or_update_content_updates_content_with_media_id_path(self) -> None: """Test that `create_or_update_content` function can update existing media with media_id path""" + # Strictly speaking, this is not an operation that is supposed to be supported, + # but is currently possible. In the case it becomes necessary, the behavior + # should not be unexpected. + # Create media with media_id path mxc_uri = self._create_local_media_with_media_id_path(self.creator) media_id = mxc_uri.media_id @@ -230,13 +294,15 @@ def test_create_or_update_content_updates_content_with_media_id_path(self) -> No ) ) assert mxc_uri.media_id == updated_mxc_uri.media_id - assert os.path.exists(os.path.join("media", SMALL_PNG_SHA256_PATH)) - # TODO: update removes the file with media_id path, and replace it with the new file with sha256 path. assert isinstance(self.repo, MediaRepository) - assert not os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) + assert os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_create_or_update_content_updates_content_with_sha256_path(self) -> None: """Test that `create_or_update_content` function can update existing media with sha256 path""" + # Strictly speaking, this is not an operation that is supposed to be supported, + # but is currently possible. In the case it becomes necessary, the behavior + # should not be unexpected. + # Create media with sha256 path mxc_uri = self._create_local_media_with_sha256_path(self.creator) media_id = mxc_uri.media_id @@ -251,7 +317,6 @@ def test_create_or_update_content_updates_content_with_sha256_path(self) -> None auth_user=UserID.from_string(self.creator), media_id=media_id, restricted=True, - sha256=SMALL_PNG_SHA256, ) ) assert mxc_uri.media_id == updated_mxc_uri.media_id @@ -331,7 +396,7 @@ def test_get_local_media_with_sha256_path(self) -> None: def test_get_remote_media_impl_with_sha256_path_cache_hit(self) -> None: server_name = "other_server.com" - # Generate remote media with media_id path + # Generate remote media with sha256 path media_id = self._create_remote_media_with_sha256_path(server_name) assert isinstance(self.repo, MediaRepository) diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index 42504068bd..bffcd7b1b6 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -18,6 +18,7 @@ # [This file includes modifications made by New Vector Limited] # # +import hashlib import os import shutil import tempfile @@ -66,10 +67,15 @@ from tests.utils import default_config +@parameterized_class(("use_dedupe",), [(True,), (False,)]) class MediaStorageTests(unittest.HomeserverTestCase): + use_dedupe: bool needs_threadpool = True def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + """ + "enable_local_media_storage_deduplication": False, + """ self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-") self.addCleanup(shutil.rmtree, self.test_dir) @@ -77,6 +83,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.secondary_base_path = os.path.join(self.test_dir, "secondary") hs.config.media.media_store_path = self.primary_base_path + hs.config.media.enable_local_media_storage_deduplication = self.use_dedupe storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)] @@ -85,24 +92,73 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: hs, self.primary_base_path, self.filepaths, storage_providers ) - def test_ensure_media_is_in_local_cache(self) -> None: - media_id = "some_media_id" - test_body = "Test\n" + self.media_id = "some_media_id" + self.test_body = b"Test\n" + self.sha256 = hashlib.sha256(self.test_body).hexdigest() + self.file_info = FileInfo(None, self.media_id, sha256=self.sha256) - # First we create a file that is in a storage provider but not in the - # local primary media store - rel_path = self.filepaths.local_media_filepath_rel(media_id) + self.expected_relative_media_id_path_fragment = ( + self.filepaths.local_media_filepath_rel(self.media_id) + ) + self.expected_relative_sha256_path_fragment = self.filepaths.filepath_sha_rel( + self.sha256 + ) + + def write_media_to_storage_provider(self) -> str: + """Write a media file to the configured storage provider""" + # Storage providers only get to work with the provided relative directory + rel_path = self.expected_relative_media_id_path_fragment secondary_path = os.path.join(self.secondary_base_path, rel_path) os.makedirs(os.path.dirname(secondary_path)) - with open(secondary_path, "w") as f: - f.write(test_body) + with open(secondary_path, "wb") as f: + f.write(self.test_body) - # Now we run ensure_media_is_in_local_cache, which should copy the file - # to the local cache. - file_info = FileInfo(None, media_id) + return secondary_path + + def write_media_to_local_media_cache(self) -> str: + """Write a media file to the local media cache directory""" + if self.use_dedupe: + rel_path = self.expected_relative_sha256_path_fragment + else: + rel_path = self.expected_relative_media_id_path_fragment + primary_path = os.path.join(self.primary_base_path, rel_path) + + os.makedirs(os.path.dirname(primary_path)) + with open(primary_path, "wb") as f: + f.write(self.test_body) + + return primary_path + + def write_media_to_local_media_cache_fallback(self) -> str: + """ + Unlike write_media_to_local_media_cache() above, this writes it to the opposite + path that was expected. So if de-duplication is not enabled, it is written to + the media ID path instead. This allows testing of flipping the switch + """ + if not self.use_dedupe: + rel_path = self.expected_relative_sha256_path_fragment + else: + rel_path = self.expected_relative_media_id_path_fragment + primary_path = os.path.join(self.primary_base_path, rel_path) + + os.makedirs(os.path.dirname(primary_path)) + + with open(primary_path, "wb") as f: + f.write(self.test_body) + + return primary_path + + def run_ensure_media_is_in_local_cache_to_completion( + self, file_info: FileInfo + ) -> str: + """ + ensure_media_is_in_local_cache() runs on a dedicated thread pool, which is + normally disabled during testing. This TestCase has it turned on, so care must + be taken to make sure it finishes. + """ # This uses a real blocking threadpool so we have to wait for it to be # actually done :/ x = defer.ensureDeferred( @@ -112,7 +168,22 @@ def test_ensure_media_is_in_local_cache(self) -> None: # Hotloop until the threadpool does its job... self.wait_on_thread(x) - local_path = self.get_success(x) + return self.get_success(x) + + def test_ensure_media_is_in_local_cache(self) -> None: + """ + Test ensure_media_is_in_local_cache() behavior when copying media to the local + cache + """ + # First we create a file that is in a storage provider but not in the + # local primary media store + self.write_media_to_storage_provider() + + # Now we run ensure_media_is_in_local_cache(), which should copy the file + # to the local cache. + local_path = self.run_ensure_media_is_in_local_cache_to_completion( + self.file_info + ) self.assertTrue(os.path.exists(local_path)) @@ -122,10 +193,55 @@ def test_ensure_media_is_in_local_cache(self) -> None: self.primary_base_path, ) - with open(local_path) as f: + with open(local_path, "rb") as f: body = f.read() - self.assertEqual(test_body, body) + self.assertEqual(self.test_body, body) + + def test_fetch_media_from_local_media_cache(self) -> None: + """Tests that fetch_media() can retrieve media from any local media cache path""" + # First we create a file that is in the local primary media store based on the + # path dictated by the de-duplication setting + self.write_media_to_local_media_cache() + # Now we run fetch_media(), which should return a responder with the file + responder = self.get_success(self.media_storage.fetch_media(self.file_info)) + assert responder is not None + assert isinstance(responder, FileResponder) + # Use the context manager, so it will close the responder for us + with responder: + responder.open_file.seek(0) + content = responder.open_file.read() + assert content == self.test_body + + def test_fetch_media_from_local_media_cache_fallback(self) -> None: + """Test that if de-duplication is turned on and off, the media is still usable""" + # First we create a file that is in the local primary media store based on the + # inverse of the path dictated by the de-duplication setting + self.write_media_to_local_media_cache_fallback() + # Now we run fetch_media(), which should return a responder with the file + responder = self.get_success(self.media_storage.fetch_media(self.file_info)) + assert responder is not None + assert isinstance(responder, FileResponder) + # Use the context manager, so it will close the responder for us + with responder: + responder.open_file.seek(0) + content = responder.open_file.read() + assert content == self.test_body + + def test_fetch_media_from_storage_provider(self) -> None: + """Test that media in the storage provider can be served back directly""" + # First we create a file that is in a storage provider but not in the + # local primary media store + self.write_media_to_storage_provider() + # Now we run fetch_media(), which should return a responder with the file + responder = self.get_success(self.media_storage.fetch_media(self.file_info)) + assert responder is not None + assert isinstance(responder, FileResponder) + # Use the context manager, so it will close the responder for us + with responder: + responder.open_file.seek(0) + content = responder.open_file.read() + assert content == self.test_body @attr.s(auto_attribs=True, slots=True, frozen=True) @@ -419,15 +535,6 @@ def _req( return channel - def _store_media_with_path(self, file_info: FileInfo, expected_path: str) -> None: - ctx = self.media_storage.store_into_file(file_info) - (f, fname) = self.get_success(ctx.__aenter__()) - f.write(SMALL_PNG) - self.get_success(ctx.__aexit__(None, None, None)) - - assert expected_path in fname - assert os.path.exists(fname), f"File does not exist: {fname}" - @unittest.override_config( { "enable_authenticated_media": False, @@ -864,41 +971,6 @@ def test_unknown_v3_endpoint(self) -> None: self.pump() self.assertEqual(channel.code, 200) - @unittest.override_config( - { - "enable_authenticated_media": False, - "enable_local_media_storage_deduplication": True, - } - ) - def test_ensure_media_storage_is_compatible_with_sha256_path(self) -> None: - """Test that `ensure_media_is_in_local_cache` and `fetch_media` works with sha256 path.""" - # Create local media with sha256 path. - expected_path = self.media_storage.filepaths.filepath_sha_rel( - sha256=SMALL_PNG_SHA256, - ) - file_info = FileInfo( - server_name="example.com", - file_id=SMALL_PNG_SHA256, - url_cache=False, - sha256=SMALL_PNG_SHA256, - ) - self._store_media_with_path(file_info, expected_path) - - # Check if `ensure_media_is_in_local_cache` can find the media. - local_path = self.get_success( - self.media_storage.ensure_media_is_in_local_cache(file_info) - ) - assert expected_path in local_path - assert os.path.exists(local_path), f"File does not exist: {local_path}" - - # Check if `fetch_media` can fetch the media. - responder = self.get_success(self.media_storage.fetch_media(file_info)) - assert responder is not None - assert isinstance(responder, FileResponder) - responder.open_file.seek(0) - content = responder.open_file.read() - assert content == SMALL_PNG - class TestSpamCheckerLegacy: """A spam checker module that rejects all media that includes the bytes diff --git a/tests/rest/client/test_media.py b/tests/rest/client/test_media.py index 12882b7081..b25861022a 100644 --- a/tests/rest/client/test_media.py +++ b/tests/rest/client/test_media.py @@ -5162,39 +5162,60 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: def _create_local_media_with_sha256_path(self) -> str: assert isinstance(self.repo, MediaRepository) + media_id = random_string(24) + # Curate a specialized FileInfo that includes sha256 data, then file will be + # forced to the new sha256 path + file_info = FileInfo( + server_name=None, + file_id=media_id, + sha256=SMALL_PNG_SHA256, + ) + expected_path = self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256) - mxc_uri = self.get_success( - self.repo.create_or_update_content( + ctx = self.repo.media_storage.store_into_file(file_info) + (f, fname) = self.get_success(ctx.__aenter__()) + f.write(SMALL_PNG) + self.get_success(ctx.__aexit__(None, None, None)) + + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, media_type="image/png", - upload_name="test_png_upload", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(self.user), - media_id=None, - restricted=True, + time_now_ms=self.clock.time_msec(), + upload_name="original_media", + media_length=67, + user_id=UserID.from_string(self.user), + sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, ) ) + assert expected_path == fname + assert os.path.exists(fname), f"File does not exist: {fname}" assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - assert not os.path.exists( - self.repo.filepaths.local_media_filepath(mxc_uri.media_id) - ) - return mxc_uri.media_id - def _create_local_media_with_media_id_path(self) -> str: - assert isinstance(self.repo, MediaRepository) - self.repo.use_sha256_path = False - mxc_uri = self.get_success( - self.repo.create_or_update_content( + # Some tests expect thumbnails, remember to generate them + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, media_type="image/png", - upload_name="original_media", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(self.user), - media_id=None, - restricted=True, + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=SMALL_PNG_SHA256, ) ) - media_id = mxc_uri.media_id + + return media_id + + def _create_local_media_with_media_id_path(self) -> str: + assert isinstance(self.repo, MediaRepository) + media_id = random_string(24) + # Curate a specialized FileInfo that is lacking sha256 data, then file will be + # forced to the old media_id path file_info = FileInfo( server_name=None, file_id=media_id, @@ -5206,10 +5227,37 @@ def _create_local_media_with_media_id_path(self) -> str: f.write(SMALL_PNG) self.get_success(ctx.__aexit__(None, None, None)) + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, + media_type="image/png", + time_now_ms=self.clock.time_msec(), + upload_name="original_media", + media_length=67, + user_id=UserID.from_string(self.user), + sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, + ) + ) assert expected_path == fname assert os.path.exists(fname), f"File does not exist: {fname}" assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - self.repo.use_sha256_path = True + + # Some tests expect thumbnails, remember to generate them + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, + media_type="image/png", + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=None, + ) + ) return media_id async def _mock_federation_download_media( @@ -5263,11 +5311,13 @@ def test_download_remote_media_saves_file_in_sha256_path(self) -> None: ) ) - def test_download_local_media_with_media_id_path_saves_file_in_sha256_path( + def test_download_local_media_with_media_id_path( self, ) -> None: - """Test that downloading local media with media_id path saves file in sha256 path.""" - # Create local media with media_id path. + """Test that downloading local media with media_id path serves correct file.""" + # Specifically, that a preexisting file on the legacy media paths can still be + # served from its present location. + media_id = self._create_local_media_with_media_id_path() channel = self.make_request( "GET", @@ -5277,12 +5327,9 @@ def test_download_local_media_with_media_id_path_saves_file_in_sha256_path( ) assert channel.code == 200 assert channel.result["body"] == SMALL_PNG - assert isinstance(self.repo, MediaRepository) - # Downloading local media with media_id path doesn't recreate the file in the sha256 path. - assert os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_download_local_media_with_sha256_path(self) -> None: - """Test that downloading local media with sha256 path saves file in sha256 path.""" + """Test that downloading local media with sha256 path serves correct file.""" media_id = self._create_local_media_with_sha256_path() channel = self.make_request( "GET", @@ -5292,9 +5339,6 @@ def test_download_local_media_with_sha256_path(self) -> None: ) assert channel.code == 200 assert channel.result["body"] == SMALL_PNG - assert isinstance(self.repo, MediaRepository) - assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) - assert not os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_copy_remote_media_with_sha256_path(self) -> None: """Test that copying remote media saves file in sha256 path.""" @@ -5323,7 +5367,7 @@ def test_copy_remote_media_with_sha256_path(self) -> None: ) assert remote_media is not None assert remote_media.sha256 == SMALL_PNG_SHA256 - assert remote_media.filesystem_id == SMALL_PNG_SHA256 + copied_media = self.get_success( self.repo.store.get_local_media(copied_media_mxc_uri.media_id) ) @@ -5333,6 +5377,7 @@ def test_copy_remote_media_with_sha256_path(self) -> None: # Check if the file is saved in the sha256 path only. assert isinstance(self.repo, MediaRepository) assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) + # It should NOT exist in either of the legacy paths assert not os.path.exists( self.repo.filepaths.remote_media_filepath( remote_server, remote_media.filesystem_id @@ -5373,9 +5418,8 @@ def test_copy_local_media_with_media_id(self) -> None: # The copy should create a new media id, and recreate the file in the sha256 path. assert isinstance(self.repo, MediaRepository) assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) - assert os.path.exists( - self.repo.filepaths.local_media_filepath(media_id) - ) # Copy operation does not delete the original file with media_id path. + assert os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) + # Copy operation does not delete the original file with media_id path. assert not os.path.exists( self.repo.filepaths.local_media_filepath(copied_media_mxc_uri.media_id) ) @@ -5435,13 +5479,14 @@ def test_upload_local_media_with_sha256_path(self) -> None: _, media_id = res.rsplit("/", maxsplit=1) # Check if the media is saved in the media table. - media = self.get_success(self.repo.store.get_local_media(media_id)) - assert media is not None - assert media.sha256 == SMALL_PNG_SHA256 + local_media_info = self.get_success(self.repo.store.get_local_media(media_id)) + assert local_media_info is not None + assert local_media_info.sha256 == SMALL_PNG_SHA256 # Check if the file is saved in the sha256 path. assert isinstance(self.repo, MediaRepository) assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) + # It should not be created on the legacy path assert not os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_download_local_thumbnail_with_media_id(self) -> None: @@ -5459,17 +5504,6 @@ def test_download_local_thumbnail_with_media_id(self) -> None: assert channel.code == 200, channel.result assert channel.result["body"] is not None - # Check if the thumbnail is saved in the media_id path, not in the sha256 path. - assert isinstance(self.repo, MediaRepository) - assert os.path.exists( - self.repo.filepaths.local_media_thumbnail( - media_id, 1, 1, "image/png", "scale" - ) - ) - assert not os.path.exists( - self.repo.filepaths.thumbnail_sha(media_id, 1, 1, "image/png", "scale") - ) - def test_download_local_thumbnail_with_sha256_path(self) -> None: """Test downloading thumbnail of local media with sha256 path.""" # Create local media with sha256 path.