From 7303392b0434f354071682e4f614ddbb2c7bb303 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Mon, 10 Nov 2025 11:39:35 -0600 Subject: [PATCH 1/3] some refinements deletes adjust some testing moved this test case to a place that supports a thread pool, and broke it into several testable components remove unused stage_into_file() function Apply feedback about resolve_path_to_media_file() Lose this TODO and do not recreate the IOBytes object Put back this new line, avoid merge conflicts Abstract actual duplicate media deletion into it's own subroutine Clean up counting how many references there are to a given media object based on sha256 Update test to clarify some comments about behavior Fix copypasta comment without update lint --- synapse/media/media_repository.py | 181 ++++++++--------- synapse/media/media_storage.py | 173 +++++++++++++--- synapse/media/thumbnailer.py | 10 +- .../databases/main/media_repository.py | 74 +++++-- tests/media/test_media_repository.py | 127 +++++++++--- tests/media/test_media_storage.py | 188 ++++++++++++------ tests/rest/client/test_media.py | 140 ++++++++----- 7 files changed, 598 insertions(+), 295 deletions(-) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index f1566616e9..205b5a9ef5 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -20,7 +20,6 @@ # # import errno -import io import logging import os import shutil @@ -168,7 +167,6 @@ async def create_or_update_content( auth_user: UserID, media_id: Optional[str] = None, restricted: bool = False, - sha256: Optional[str] = None, ) -> MXCUri: raise NotImplementedError( "Sorry Mario, your MediaRepository related function is in another castle" @@ -764,7 +762,6 @@ async def create_or_update_content( auth_user: UserID, media_id: Optional[str] = None, restricted: bool = False, - sha256: Optional[str] = None, ) -> MXCUri: """Create or update the content of the given media ID. @@ -786,17 +783,14 @@ async def create_or_update_content( if media_id is None: media_id = random_string(24) - # if self.use_sha256_path and media_id and not sha256: - # sha256 = await self.store.get_sha_by_media_id(media_id) - - file_info = FileInfo(server_name=None, file_id=media_id, sha256=sha256) - sha256reader = SHA256TransparentIOReader(content) - data = sha256reader.read() + # Do a read() before a hexdigest(), or the hash will not be of the complete file + sha256reader.read() sha256 = sha256reader.hexdigest() + file_info = FileInfo(server_name=None, file_id=media_id, sha256=sha256) - # This implements all of IO as it has a passthrough - fname = await self.media_storage.store_file(io.BytesIO(data), file_info) + # store_file() has an inner function that does a seek(0) on the IO + fname = await self.media_storage.store_file(sha256reader.wrap(), file_info) should_quarantine = await self.store.get_is_hash_quarantined(sha256) logger.info("Stored local media in file %r", fname) @@ -806,17 +800,6 @@ async def create_or_update_content( "Media has been automatically quarantined as it matched existing quarantined media" ) - # If we enabled sha256 paths, we create a new file with sha256 path, and delete - # the original media_id path. - if self.use_sha256_path: - if not os.path.exists(self.filepaths.filepath_sha(sha256)): - file_info = FileInfo(server_name=None, file_id=media_id, sha256=sha256) - sha256_fname = await self.media_storage.store_file(content, file_info) - logger.info("Stored media in file %r", sha256_fname) - os.remove(fname) - else: - logger.info("File already exists in sha256 path.") - # Check that the user has not exceeded any of the media upload limits. # This is the total size of media uploaded by the user in the last @@ -873,9 +856,9 @@ async def create_or_update_content( await self._generate_thumbnails( server_name=None, media_id=media_id, - file_id=sha256 if self.use_sha256_path else media_id, + file_id=media_id, media_type=media_type, - sha256=sha256 if self.use_sha256_path else None, + sha256=sha256, ) except Exception as e: logger.info("Failed to generate thumbnails: %s", e) @@ -928,7 +911,6 @@ async def copy_media( old_media_info.media_length, auth_user, restricted=True, - sha256=old_media_info.sha256, ) # I could not find a place this was close()'d explicitly, but this felt prudent io_object.close() @@ -1301,6 +1283,8 @@ async def _get_remote_media_impl( # Failed to find the file anywhere, lets download it. try: + # Both of these retrieval functions will stage the file, so after they + # complete the migration function should be called below if not use_federation_endpoint: media_info = await self._download_remote_file( server_name, @@ -1342,32 +1326,26 @@ async def _get_remote_media_impl( if not media_info.media_type: media_info = attr.evolve(media_info, media_type="application/octet-stream") + # Now that the file has been retrieved, need to see if it needs to be migrated. + # Should probably do this before generating thumbnails file_info = FileInfo( - server_name=server_name, - file_id=media_info.sha256 - if self.use_sha256_path and media_info.sha256 - else file_id, - sha256=media_info.sha256, + server_name=server_name, file_id=file_id, sha256=media_info.sha256 ) + await self.media_storage.maybe_move_media_file_from_id_to_sha256_path(file_info) + # We generate thumbnails even if another process downloaded the media # as a) it's conceivable that the other download request dies before it # generates thumbnails, but mainly b) we want to be sure the thumbnails # have finished being generated before responding to the client, # otherwise they'll request thumbnails and get a 404 if they're not # ready yet. - if self.use_sha256_path: - assert media_info.sha256 is not None - await self._generate_thumbnails( - server_name=server_name, - media_id=media_id, - file_id=media_info.sha256, # Passing over sha256 for file_id if sha256 path is enabled. - media_type=media_info.media_type, - sha256=media_info.sha256, - ) - else: - await self._generate_thumbnails( - server_name, media_id, file_id, media_info.media_type - ) + await self._generate_thumbnails( + server_name=server_name, + media_id=media_id, + file_id=file_id, + media_type=media_info.media_type, + sha256=media_info.sha256, + ) responder = await self.media_storage.fetch_media(file_info) return responder, media_info @@ -1474,7 +1452,7 @@ async def _download_remote_file( time_now_ms=time_now_ms, upload_name=upload_name, media_length=length, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, sha256=sha256, ) @@ -1485,14 +1463,6 @@ async def _download_remote_file( else: authenticated = False - # If sha256 paths are enabled, rename the file to sha256 path and delete the original media_id path. - if self.use_sha256_path: - rel_path = self.filepaths.filepath_sha(sha256) - abs_path = os.path.join(self.hs.config.media.media_store_path, rel_path) - os.makedirs(os.path.dirname(abs_path), exist_ok=True) - os.rename(fname, abs_path) - os.remove(fname) - return RemoteMedia( media_origin=server_name, media_id=media_id, @@ -1500,7 +1470,7 @@ async def _download_remote_file( media_length=length, upload_name=upload_name, created_ts=time_now_ms, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, last_access_ts=time_now_ms, quarantined_by=None, authenticated=authenticated, @@ -1642,7 +1612,7 @@ async def _federation_download_remote_file( time_now_ms=time_now_ms, upload_name=upload_name, media_length=length, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, sha256=sha256, restricted=restricted, ) @@ -1668,14 +1638,6 @@ async def _federation_download_remote_file( else: authenticated = False - # If sha256 paths are enabled, rename the file to sha256 path and delete the original media_id path. - if self.use_sha256_path: - rel_path = self.filepaths.filepath_sha(sha256) - abs_path = os.path.join(self.hs.config.media.media_store_path, rel_path) - os.makedirs(os.path.dirname(abs_path), exist_ok=True) - os.rename(fname, abs_path) - os.remove(fname) - return RemoteMedia( media_origin=server_name, media_id=media_id, @@ -1683,7 +1645,7 @@ async def _federation_download_remote_file( media_length=length, upload_name=upload_name, created_ts=time_now_ms, - filesystem_id=sha256 if self.use_sha256_path else file_id, + filesystem_id=file_id, last_access_ts=time_now_ms, quarantined_by=None, authenticated=authenticated, @@ -1931,7 +1893,7 @@ async def _generate_thumbnails( file_info = FileInfo( server_name, - file_id, # This will be the sha256 if sha256 path is enabled. Otherwise, it will be the file_id. + file_id, url_cache=url_cache, sha256=sha256, ) @@ -2014,9 +1976,7 @@ async def _generate_thumbnails( file_info = FileInfo( server_name=server_name, - file_id=sha256 - if self.use_sha256_path and sha256 - else file_id, # Saving the thumbnail with sha256 path if sha256 path is enabled. + file_id=file_id, url_cache=url_cache, thumbnail=ThumbnailInfo( width=t_width, @@ -2138,22 +2098,23 @@ async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]: logger.info("Deleting: %r", key) # TODO: Should we delete from the backup store + # Jason: Probably, we literally don't delete from it anywhere else async with self.remote_media_linearizer.queue(key): - full_path = self.filepaths.remote_media_filepath(origin, file_id) - try: - os.remove(full_path) - except OSError as e: - logger.warning("Failed to remove file: %r", full_path) - if e.errno == errno.ENOENT: - pass - else: - continue + paths = [self.filepaths.remote_media_filepath(origin, file_id)] + thumbnail_paths = [ + self.filepaths.remote_media_thumbnail_dir(origin, file_id) + ] - thumbnail_dir = self.filepaths.remote_media_thumbnail_dir( - origin, file_id - ) - shutil.rmtree(thumbnail_dir, ignore_errors=True) + sha256 = await self.store.get_sha_for_remote_media_id(origin, media_id) + # In the interest of removing media that was de-duplicated, only purge + # the actual media object if there are no more references to it in the + # database. + if await self.store.get_media_reference_count_for_sha256(sha256) <= 1: + paths.append(self.filepaths.filepath_sha(sha256)) + thumbnail_paths.append(self.filepaths.thumbnail_sha_dir(sha256)) + + _remove_media_files(paths, thumbnail_paths) await self.store.delete_remote_media(origin, media_id) deleted += 1 @@ -2164,10 +2125,10 @@ async def delete_local_media_ids( self, media_ids: List[str] ) -> Tuple[List[str], int]: """ - Delete the given local or remote media ID from this server + Delete the given local media ID from this server Args: - media_id: The media ID to delete. + media_ids: The list of media IDs to delete. Returns: A tuple of (list of deleted media IDs, total deleted media IDs). """ @@ -2182,7 +2143,7 @@ async def delete_old_local_media( delete_protected_media: bool = False, ) -> Tuple[List[str], int]: """ - Delete local or remote media from this server by size and timestamp. Removes + Delete local media from this server by size and timestamp. Removes media files, any thumbnails and cached URLs. Args: @@ -2210,7 +2171,7 @@ async def _remove_local_media_from_disk( self, media_ids: List[str] ) -> Tuple[List[str], int]: """ - Delete local or remote media from this server. Removes media files, + Delete local media from this server. Removes media files, any thumbnails and cached URLs. Args: @@ -2221,31 +2182,53 @@ async def _remove_local_media_from_disk( removed_media = [] for media_id in media_ids: logger.info("Deleting media with ID '%s'", media_id) - sha256 = await self.store.get_sha_by_media_id(media_id, None) paths = [ self.filepaths.local_media_filepath(media_id), - self.filepaths.filepath_sha(sha256), ] - for path in paths: - try: - os.remove(path) - except OSError as e: - logger.warning("Failed to remove file: %r: %s", path, e) - if e.errno == errno.ENOENT: - pass - else: - continue + thumbnail_paths = [self.filepaths.local_media_thumbnail_dir(media_id)] + + sha256 = await self.store.get_sha_for_local_media_id(media_id) + # In the interest of removing media that was de-duplicated, only purge + # the actual media object if there are no more references to it in the + # database. + if await self.store.get_media_reference_count_for_sha256(sha256) <= 1: + paths.append(self.filepaths.filepath_sha(sha256)) + thumbnail_paths.append(self.filepaths.thumbnail_sha_dir(sha256)) - thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id) - thumbnail_dir_sha = self.filepaths.thumbnail_sha_dir(sha256) - shutil.rmtree(thumbnail_dir, ignore_errors=True) - shutil.rmtree(thumbnail_dir_sha, ignore_errors=True) + _remove_media_files(paths, thumbnail_paths) + # Poorly named, this deletes media *from this server* that is somehow + # considered "remote". Observe the server name argument below await self.store.delete_remote_media(self.server_name, media_id) await self.store.delete_url_cache((media_id,)) + # Poorly named, but this is the one that cleans out local media metadata await self.store.delete_url_cache_media((media_id,)) removed_media.append(media_id) return removed_media, len(removed_media) + + +def _remove_media_files(paths: List[str], thumbnail_paths: List[str]) -> None: + """ + Remove the select files from the filesystem. IO related errors, such as non-existent + files, will be ignored. + + Args: + paths: The absolute path to a specific file to be removed + thumbnail_paths: The absolute path to a specific directory. The entire directory + will be removed. + """ + for path in paths: + try: + os.remove(path) + except OSError as e: + logger.warning("Failed to remove file: %r: %s", path, e) + if e.errno == errno.ENOENT: + pass + else: + continue + + for thumbnail_path in thumbnail_paths: + shutil.rmtree(thumbnail_path, ignore_errors=True) diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py index c67d3f72d1..8678ec2259 100644 --- a/synapse/media/media_storage.py +++ b/synapse/media/media_storage.py @@ -226,7 +226,7 @@ async def store_into_file( # .. write into f ... """ # With `use_sha256_path` enabled, new files are stored with SHA256 paths. - path = self._file_info_to_path(file_info) + path = self.resolve_path_to_media_file(file_info) fname = os.path.join(self.local_media_directory, path) dirname = os.path.dirname(fname) os.makedirs(dirname, exist_ok=True) @@ -251,9 +251,12 @@ async def store_into_file( # the spam-check API. raise SpamMediaException(errcode=spam_check[0]) + # Unlike the local cache, the storage providers are on their own for + # de-duplicating media. They get a relative path to work with + relative_legacy_path = self._file_info_to_path(file_info) for provider in self.storage_providers: with start_active_span(str(provider)): - await provider.store_file(path, file_info) + await provider.store_file(relative_legacy_path, file_info) except Exception as e: try: @@ -273,10 +276,7 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: Returns: Returns a Responder if the file was found, otherwise None. """ - paths = [self._file_info_to_path(file_info, force_sha256=False)] - - if self.use_sha256_path: - paths.append(self._file_info_to_path(file_info)) + paths = [self.resolve_path_to_media_file(file_info)] # fallback for remote thumbnails with no method in the filename if file_info.thumbnail and file_info.server_name: @@ -297,6 +297,10 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: return FileResponder(self.hs, open(local_path, "rb")) logger.debug("local file %s did not exist", local_path) + # Unlike the local cache, the storage providers are on their own for + # de-duplicating media. They get a relative path to work with + relative_legacy_path = self._file_info_to_path(file_info) + paths = [relative_legacy_path] for provider in self.storage_providers: for path in paths: res: Any = await provider.fetch(path, file_info) @@ -318,14 +322,7 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: Returns: Full path to local file """ - # When `use_sha256_path` is enabled, we first check for the SHA256 path. - if self.use_sha256_path and not file_info.url_cache: - path = self._file_info_to_path(file_info) - local_path = os.path.join(self.local_media_directory, path) - if os.path.exists(local_path): - return local_path - - path = self._file_info_to_path(file_info, force_sha256=False) + path = self.resolve_path_to_media_file(file_info) local_path = os.path.join(self.local_media_directory, path) if os.path.exists(local_path): return local_path @@ -347,8 +344,11 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: dirname = os.path.dirname(local_path) os.makedirs(dirname, exist_ok=True) + # Unlike the local cache, the storage providers are on their own for + # de-duplicating media. They get a relative path to work with + relative_legacy_path = self._file_info_to_path(file_info) for provider in self.storage_providers: - res: Any = await provider.fetch(path, file_info) + res: Any = await provider.fetch(relative_legacy_path, file_info) if res: with res: consumer = BackgroundFileConsumer( @@ -361,9 +361,7 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: raise NotFoundError() @trace - def _file_info_to_path( - self, file_info: FileInfo, force_sha256: Optional[bool] = None - ) -> str: + def _file_info_to_path(self, file_info: FileInfo) -> str: """ Converts file_info to a relative path. @@ -372,22 +370,7 @@ def _file_info_to_path( Args: file_info: File information - force_sha256: If True, force SHA256 path; if False, force media_id path, - if None, use self.use_sha256_path flag. """ - use_sha256 = force_sha256 if force_sha256 is not None else self.use_sha256_path - if use_sha256 and file_info.sha256 and not file_info.url_cache: - if file_info.thumbnail: - return self.filepaths.thumbnail_sha_rel( - sha256=file_info.sha256, - width=file_info.thumbnail.width, - height=file_info.thumbnail.height, - content_type=file_info.thumbnail.type, - method=file_info.thumbnail.method, - ) - return self.filepaths.filepath_sha_rel(file_info.sha256) - - # Fall back to media_id-based paths if file_info.url_cache: if file_info.thumbnail: return self.filepaths.url_cache_thumbnail_rel( @@ -423,6 +406,130 @@ def _file_info_to_path( ) return self.filepaths.local_media_filepath_rel(file_info.file_id) + def _file_info_to_sha256_path(self, file_info: FileInfo) -> str: + """ + Resolve the path based on the sha256 hash. This does not work for the url cache. + + Returns: + The relative path to where the file should be + """ + # There should not be a case where a url preview should be cached in sha256 + assert not file_info.url_cache, "Can not use sha256 paths with url caching" + + assert file_info.sha256 is not None, ( + "Can not resolve a path to a sha256 object without a hash" + ) + if file_info.thumbnail: + return self.filepaths.thumbnail_sha_rel( + sha256=file_info.sha256, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.filepath_sha_rel(file_info.sha256) + + def resolve_path_to_media_file(self, file_info: FileInfo) -> str: + """ + Fully resolve the filesystem path to the media object. Allow for backwards + compatibility with existing media_id-based file names. + + Arguments: + file_info: The FileInfo object that contains the data for looking up the + file path + Returns: + The fully resolved, relative path to the file request. + """ + + media_id_path = self._file_info_to_path(file_info) + + if not file_info.sha256 or file_info.url_cache: + return media_id_path + sha256_path = self._file_info_to_sha256_path(file_info) + + media_id_abs_path = os.path.join(self.local_media_directory, media_id_path) + media_id_file_exists = os.path.exists(media_id_abs_path) + + sha256_abs_path = os.path.join(self.local_media_directory, sha256_path) + sha256_file_exists = os.path.exists(sha256_abs_path) + + logger.debug( + "Verifying media file exists at path %s: %r", + media_id_abs_path, + media_id_file_exists, + ) + logger.debug( + "Verifying media file exists at sha256 path %s: %r", + sha256_abs_path, + sha256_file_exists, + ) + + # When `use_sha256_path` is not enabled, but might have been in the past, use + # that file if it already exists. Perhaps the admin changed their mind? + if sha256_file_exists: + return sha256_path + if media_id_file_exists: + return media_id_path + return sha256_path if self.use_sha256_path else media_id_path + + async def maybe_move_media_file_from_id_to_sha256_path( + self, file_info: FileInfo + ) -> None: + """ + Move a media file from a legacy "media_id" based path to a "sha256" based path. + This is most appropriate after using `store_into_file()` for federation + downloaded media. + """ + # Check the old media id path for the file, then check the sha256 path. + # If media exists on the old path, and use_sha256_path is enabled, move + # the file. Migration is one way + + if not self.use_sha256_path: + logger.debug("Can not move media file if de-deduplication turned off") + return + if not file_info.sha256: + logger.warning( + "Sha256 data on media/file id '%s' is missing. Skipping file move", + file_info.file_id, + ) + return + + media_id_path = self._file_info_to_path(file_info) + media_id_abs_path = os.path.join(self.local_media_directory, media_id_path) + media_id_file_exists = os.path.exists(media_id_abs_path) + logger.debug( + "Verifying media file exists at path %s: %r", + media_id_abs_path, + media_id_file_exists, + ) + + sha256_path = self._file_info_to_sha256_path(file_info) + sha256_abs_path = os.path.join(self.local_media_directory, sha256_path) + sha256_file_exists = os.path.exists(sha256_abs_path) + logger.debug( + "Verifying media file exists at sha256 path %s: %r", + sha256_abs_path, + sha256_file_exists, + ) + + if media_id_file_exists and not sha256_file_exists: + os.makedirs(os.path.dirname(sha256_abs_path), exist_ok=True) + # os.rename() should be an atomic operation + os.rename(media_id_abs_path, sha256_abs_path) + logger.debug( + "Moved file from old media id path to new sha256 path: %s", + sha256_abs_path, + ) + + # Having `use_sha256_path` enabled is our signal that the server prefers using + # the newer path style + if media_id_file_exists and sha256_file_exists: + # Probably should clean up the old duplicate? Do we want a feature flag? + os.remove(media_id_abs_path) + logger.debug( + "Both paths existed, deleting old media id path: %s", media_id_abs_path + ) + @trace def _write_file_synchronously(source: IO, dest: IO) -> None: diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py index 914d096355..35c84834b4 100644 --- a/synapse/media/thumbnailer.py +++ b/synapse/media/thumbnailer.py @@ -392,6 +392,7 @@ async def select_or_generate_local_thumbnail( file_id=media_id, url_cache=bool(media_info.url_cache), thumbnail=info, + sha256=media_info.sha256, ) responder = await self.media_storage.fetch_media(file_info) @@ -507,6 +508,7 @@ async def select_or_generate_remote_thumbnail( server_name=server_name, file_id=file_id, thumbnail=info, + sha256=media_info.sha256, ) responder = await self.media_storage.fetch_media(file_info) @@ -527,7 +529,7 @@ async def select_or_generate_remote_thumbnail( desired_height, desired_method, desired_type, - media_info.sha256 if self.use_sha256_path else None, + media_info.sha256, ) if file_path: @@ -596,9 +598,7 @@ async def respond_remote_thumbnail( url_cache=False, server_name=server_name, for_federation=False, - sha256=media_info.sha256 - if self.use_sha256_path and media_info.sha256 - else None, + sha256=media_info.sha256, ) async def _select_and_respond_with_thumbnail( @@ -658,7 +658,7 @@ async def _select_and_respond_with_thumbnail( file_id, url_cache, server_name, - sha256=sha256 if self.use_sha256_path and sha256 else None, + sha256=sha256, ) if not file_info: logger.info("Couldn't find a thumbnail matching the desired inputs") diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 22fe8622c7..6d794dec98 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -1309,8 +1309,9 @@ def set_media_restricted_to_user_profile_txn( json_dict=json_object, ) - async def get_sha_by_media_id( - self, media_id: str, server_name: Optional[str] = None + async def get_sha_for_local_media_id( + self, + media_id: str, ) -> str: """ Get the media ID of and return SHA256 hash of given media. @@ -1322,22 +1323,39 @@ async def get_sha_by_media_id( LIMIT 1 """ - if server_name and not self.hs.is_mine_server_name(server_name): - sql = """ - SELECT sha256 - FROM remote_media_cache - WHERE media_origin = ? AND media_id = ? - ORDER BY created_ts ASC - LIMIT 1 - """ + def _get_sha_for_local_media_id_txn( + txn: LoggingTransaction, + ) -> str: + txn.execute(sql, (media_id,)) + + rows = txn.fetchone() + if not rows: + raise ValueError("media_id %s has no sha256 field", media_id) + return rows[0] + + return await self.db_pool.runInteraction( + "get_sha_for_local_media_id", _get_sha_for_local_media_id_txn + ) + + async def get_sha_for_remote_media_id(self, server_name: str, media_id: str) -> str: + """ + Get the media ID of and return SHA256 hash of given media. + """ + assert not self.hs.is_mine_server_name(server_name), ( + "Can not run against own server" + ) + sql = """ + SELECT sha256 + FROM remote_media_cache + WHERE media_origin = ? AND media_id = ? + ORDER BY created_ts ASC + LIMIT 1 + """ - def _get_sha_by_media_id_txn( + def _get_sha_for_remote_media_id_txn( txn: LoggingTransaction, ) -> str: - if server_name and not self.hs.is_mine_server_name(server_name): - txn.execute(sql, (server_name, media_id)) - else: - txn.execute(sql, (media_id,)) + txn.execute(sql, (server_name, media_id)) rows = txn.fetchone() if not rows: @@ -1345,5 +1363,29 @@ def _get_sha_by_media_id_txn( return rows[0] return await self.db_pool.runInteraction( - "get_sha_by_media_id", _get_sha_by_media_id_txn + "get_sha_for_remote_media_id", _get_sha_for_remote_media_id_txn + ) + + async def get_media_reference_count_for_sha256(self, sha256: str) -> int: + """Get total number of local + remote media entries for this SHA256.""" + + def _get_reference_count_txn(txn: LoggingTransaction) -> int: + sql = """ + SELECT SUM(total_rows) AS grand_total_rows + FROM ( + SELECT COUNT(*) AS total_rows FROM local_media_repository WHERE sha256 = ? + UNION ALL + SELECT COUNT(*) AS total_rows FROM remote_media_cache WHERE sha256 = ? + ) AS combined_counts + """ + + txn.execute(sql, (sha256, sha256)) + row_tuple = txn.fetchone() + if row_tuple is not None: + (row,) = row_tuple + return row + return 0 + + return await self.db_pool.runInteraction( + "get_media_reference_count_for_sha256", _get_reference_count_txn ) diff --git a/tests/media/test_media_repository.py b/tests/media/test_media_repository.py index 996e521af1..f5610fe521 100644 --- a/tests/media/test_media_repository.py +++ b/tests/media/test_media_repository.py @@ -76,47 +76,107 @@ def _store_media_with_path(self, file_info: FileInfo, expected_path: str) -> Non assert os.path.exists(fname), f"File does not exist: {fname}" def _create_local_media_with_media_id_path(self, user: str) -> MXCUri: + """ + Force creation of a media object at the specific place based on the media id + path. This does assert that the file exists where it is expected to be + """ assert isinstance(self.repo, MediaRepository) - self.repo.use_sha256_path = False - mxc_uri = self.get_success( - self.repo.create_or_update_content( + media_id = random_string(24) + # Curate a specialized FileInfo that is lacking sha256 data, then file will be + # forced to the old media_id path + file_info = FileInfo( + server_name=None, + file_id=media_id, + ) + + expected_path = self.repo.filepaths.local_media_filepath(media_id) + ctx = self.repo.media_storage.store_into_file(file_info) + (f, fname) = self.get_success(ctx.__aenter__()) + f.write(SMALL_PNG) + self.get_success(ctx.__aexit__(None, None, None)) + + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, media_type="image/png", + time_now_ms=self.clock.time_msec(), upload_name="original_media", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(user), - media_id=None, - restricted=True, + media_length=67, + user_id=UserID.from_string(user), + sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, ) ) - media_id = mxc_uri.media_id + assert expected_path == fname + assert os.path.exists(fname), f"File does not exist: {fname}" + assert os.path.exists(expected_path), f"File does not exist: {expected_path}" + + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, + media_type="image/png", + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=None, + ) + ) + return MXCUri(server_name=self.hs.config.server.server_name, media_id=media_id) + + def _create_local_media_with_sha256_path(self, user: str) -> MXCUri: + assert isinstance(self.repo, MediaRepository) + media_id = random_string(24) + # Curate a specialized FileInfo that includes sha256 data, then file will be + # forced to the new sha256 path file_info = FileInfo( server_name=None, file_id=media_id, + sha256=SMALL_PNG_SHA256, ) - assert isinstance(self.repo, MediaRepository) - expected_path = self.repo.filepaths.local_media_filepath(media_id) - self._store_media_with_path(file_info, expected_path) - assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - self.repo.use_sha256_path = True - return mxc_uri - def _create_local_media_with_sha256_path(self, user: str) -> MXCUri: - expected_path = os.path.join("media", SMALL_PNG_SHA256_PATH) - mxc_uri = self.get_success( - self.repo.create_or_update_content( + expected_path = self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256) + ctx = self.repo.media_storage.store_into_file(file_info) + (f, fname) = self.get_success(ctx.__aenter__()) + f.write(SMALL_PNG) + self.get_success(ctx.__aexit__(None, None, None)) + + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, media_type="image/png", - upload_name="test_png_upload", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(user), - media_id=None, - restricted=True, + time_now_ms=self.clock.time_msec(), + upload_name="original_media", + media_length=67, + user_id=UserID.from_string(user), sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, ) ) + assert expected_path == fname + assert os.path.exists(fname), f"File does not exist: {fname}" assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - return mxc_uri + + # Some tests expect thumbnails, remember to generate them + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, + media_type="image/png", + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=SMALL_PNG_SHA256, + ) + ) + + return MXCUri(server_name=self.hs.config.server.server_name, media_id=media_id) def _create_remote_media_with_media_id_path(self, server_name: str) -> str: media_id = random_string(24) @@ -213,6 +273,10 @@ def test_create_or_update_content_creates_new_content_with_sha256_path( def test_create_or_update_content_updates_content_with_media_id_path(self) -> None: """Test that `create_or_update_content` function can update existing media with media_id path""" + # Strictly speaking, this is not an operation that is supposed to be supported, + # but is currently possible. In the case it becomes necessary, the behavior + # should not be unexpected. + # Create media with media_id path mxc_uri = self._create_local_media_with_media_id_path(self.creator) media_id = mxc_uri.media_id @@ -230,13 +294,15 @@ def test_create_or_update_content_updates_content_with_media_id_path(self) -> No ) ) assert mxc_uri.media_id == updated_mxc_uri.media_id - assert os.path.exists(os.path.join("media", SMALL_PNG_SHA256_PATH)) - # TODO: update removes the file with media_id path, and replace it with the new file with sha256 path. assert isinstance(self.repo, MediaRepository) - assert not os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) + assert os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_create_or_update_content_updates_content_with_sha256_path(self) -> None: """Test that `create_or_update_content` function can update existing media with sha256 path""" + # Strictly speaking, this is not an operation that is supposed to be supported, + # but is currently possible. In the case it becomes necessary, the behavior + # should not be unexpected. + # Create media with sha256 path mxc_uri = self._create_local_media_with_sha256_path(self.creator) media_id = mxc_uri.media_id @@ -251,7 +317,6 @@ def test_create_or_update_content_updates_content_with_sha256_path(self) -> None auth_user=UserID.from_string(self.creator), media_id=media_id, restricted=True, - sha256=SMALL_PNG_SHA256, ) ) assert mxc_uri.media_id == updated_mxc_uri.media_id @@ -331,7 +396,7 @@ def test_get_local_media_with_sha256_path(self) -> None: def test_get_remote_media_impl_with_sha256_path_cache_hit(self) -> None: server_name = "other_server.com" - # Generate remote media with media_id path + # Generate remote media with sha256 path media_id = self._create_remote_media_with_sha256_path(server_name) assert isinstance(self.repo, MediaRepository) diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index 42504068bd..bffcd7b1b6 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -18,6 +18,7 @@ # [This file includes modifications made by New Vector Limited] # # +import hashlib import os import shutil import tempfile @@ -66,10 +67,15 @@ from tests.utils import default_config +@parameterized_class(("use_dedupe",), [(True,), (False,)]) class MediaStorageTests(unittest.HomeserverTestCase): + use_dedupe: bool needs_threadpool = True def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + """ + "enable_local_media_storage_deduplication": False, + """ self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-") self.addCleanup(shutil.rmtree, self.test_dir) @@ -77,6 +83,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.secondary_base_path = os.path.join(self.test_dir, "secondary") hs.config.media.media_store_path = self.primary_base_path + hs.config.media.enable_local_media_storage_deduplication = self.use_dedupe storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)] @@ -85,24 +92,73 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: hs, self.primary_base_path, self.filepaths, storage_providers ) - def test_ensure_media_is_in_local_cache(self) -> None: - media_id = "some_media_id" - test_body = "Test\n" + self.media_id = "some_media_id" + self.test_body = b"Test\n" + self.sha256 = hashlib.sha256(self.test_body).hexdigest() + self.file_info = FileInfo(None, self.media_id, sha256=self.sha256) - # First we create a file that is in a storage provider but not in the - # local primary media store - rel_path = self.filepaths.local_media_filepath_rel(media_id) + self.expected_relative_media_id_path_fragment = ( + self.filepaths.local_media_filepath_rel(self.media_id) + ) + self.expected_relative_sha256_path_fragment = self.filepaths.filepath_sha_rel( + self.sha256 + ) + + def write_media_to_storage_provider(self) -> str: + """Write a media file to the configured storage provider""" + # Storage providers only get to work with the provided relative directory + rel_path = self.expected_relative_media_id_path_fragment secondary_path = os.path.join(self.secondary_base_path, rel_path) os.makedirs(os.path.dirname(secondary_path)) - with open(secondary_path, "w") as f: - f.write(test_body) + with open(secondary_path, "wb") as f: + f.write(self.test_body) - # Now we run ensure_media_is_in_local_cache, which should copy the file - # to the local cache. - file_info = FileInfo(None, media_id) + return secondary_path + + def write_media_to_local_media_cache(self) -> str: + """Write a media file to the local media cache directory""" + if self.use_dedupe: + rel_path = self.expected_relative_sha256_path_fragment + else: + rel_path = self.expected_relative_media_id_path_fragment + primary_path = os.path.join(self.primary_base_path, rel_path) + + os.makedirs(os.path.dirname(primary_path)) + with open(primary_path, "wb") as f: + f.write(self.test_body) + + return primary_path + + def write_media_to_local_media_cache_fallback(self) -> str: + """ + Unlike write_media_to_local_media_cache() above, this writes it to the opposite + path that was expected. So if de-duplication is not enabled, it is written to + the media ID path instead. This allows testing of flipping the switch + """ + if not self.use_dedupe: + rel_path = self.expected_relative_sha256_path_fragment + else: + rel_path = self.expected_relative_media_id_path_fragment + primary_path = os.path.join(self.primary_base_path, rel_path) + + os.makedirs(os.path.dirname(primary_path)) + + with open(primary_path, "wb") as f: + f.write(self.test_body) + + return primary_path + + def run_ensure_media_is_in_local_cache_to_completion( + self, file_info: FileInfo + ) -> str: + """ + ensure_media_is_in_local_cache() runs on a dedicated thread pool, which is + normally disabled during testing. This TestCase has it turned on, so care must + be taken to make sure it finishes. + """ # This uses a real blocking threadpool so we have to wait for it to be # actually done :/ x = defer.ensureDeferred( @@ -112,7 +168,22 @@ def test_ensure_media_is_in_local_cache(self) -> None: # Hotloop until the threadpool does its job... self.wait_on_thread(x) - local_path = self.get_success(x) + return self.get_success(x) + + def test_ensure_media_is_in_local_cache(self) -> None: + """ + Test ensure_media_is_in_local_cache() behavior when copying media to the local + cache + """ + # First we create a file that is in a storage provider but not in the + # local primary media store + self.write_media_to_storage_provider() + + # Now we run ensure_media_is_in_local_cache(), which should copy the file + # to the local cache. + local_path = self.run_ensure_media_is_in_local_cache_to_completion( + self.file_info + ) self.assertTrue(os.path.exists(local_path)) @@ -122,10 +193,55 @@ def test_ensure_media_is_in_local_cache(self) -> None: self.primary_base_path, ) - with open(local_path) as f: + with open(local_path, "rb") as f: body = f.read() - self.assertEqual(test_body, body) + self.assertEqual(self.test_body, body) + + def test_fetch_media_from_local_media_cache(self) -> None: + """Tests that fetch_media() can retrieve media from any local media cache path""" + # First we create a file that is in the local primary media store based on the + # path dictated by the de-duplication setting + self.write_media_to_local_media_cache() + # Now we run fetch_media(), which should return a responder with the file + responder = self.get_success(self.media_storage.fetch_media(self.file_info)) + assert responder is not None + assert isinstance(responder, FileResponder) + # Use the context manager, so it will close the responder for us + with responder: + responder.open_file.seek(0) + content = responder.open_file.read() + assert content == self.test_body + + def test_fetch_media_from_local_media_cache_fallback(self) -> None: + """Test that if de-duplication is turned on and off, the media is still usable""" + # First we create a file that is in the local primary media store based on the + # inverse of the path dictated by the de-duplication setting + self.write_media_to_local_media_cache_fallback() + # Now we run fetch_media(), which should return a responder with the file + responder = self.get_success(self.media_storage.fetch_media(self.file_info)) + assert responder is not None + assert isinstance(responder, FileResponder) + # Use the context manager, so it will close the responder for us + with responder: + responder.open_file.seek(0) + content = responder.open_file.read() + assert content == self.test_body + + def test_fetch_media_from_storage_provider(self) -> None: + """Test that media in the storage provider can be served back directly""" + # First we create a file that is in a storage provider but not in the + # local primary media store + self.write_media_to_storage_provider() + # Now we run fetch_media(), which should return a responder with the file + responder = self.get_success(self.media_storage.fetch_media(self.file_info)) + assert responder is not None + assert isinstance(responder, FileResponder) + # Use the context manager, so it will close the responder for us + with responder: + responder.open_file.seek(0) + content = responder.open_file.read() + assert content == self.test_body @attr.s(auto_attribs=True, slots=True, frozen=True) @@ -419,15 +535,6 @@ def _req( return channel - def _store_media_with_path(self, file_info: FileInfo, expected_path: str) -> None: - ctx = self.media_storage.store_into_file(file_info) - (f, fname) = self.get_success(ctx.__aenter__()) - f.write(SMALL_PNG) - self.get_success(ctx.__aexit__(None, None, None)) - - assert expected_path in fname - assert os.path.exists(fname), f"File does not exist: {fname}" - @unittest.override_config( { "enable_authenticated_media": False, @@ -864,41 +971,6 @@ def test_unknown_v3_endpoint(self) -> None: self.pump() self.assertEqual(channel.code, 200) - @unittest.override_config( - { - "enable_authenticated_media": False, - "enable_local_media_storage_deduplication": True, - } - ) - def test_ensure_media_storage_is_compatible_with_sha256_path(self) -> None: - """Test that `ensure_media_is_in_local_cache` and `fetch_media` works with sha256 path.""" - # Create local media with sha256 path. - expected_path = self.media_storage.filepaths.filepath_sha_rel( - sha256=SMALL_PNG_SHA256, - ) - file_info = FileInfo( - server_name="example.com", - file_id=SMALL_PNG_SHA256, - url_cache=False, - sha256=SMALL_PNG_SHA256, - ) - self._store_media_with_path(file_info, expected_path) - - # Check if `ensure_media_is_in_local_cache` can find the media. - local_path = self.get_success( - self.media_storage.ensure_media_is_in_local_cache(file_info) - ) - assert expected_path in local_path - assert os.path.exists(local_path), f"File does not exist: {local_path}" - - # Check if `fetch_media` can fetch the media. - responder = self.get_success(self.media_storage.fetch_media(file_info)) - assert responder is not None - assert isinstance(responder, FileResponder) - responder.open_file.seek(0) - content = responder.open_file.read() - assert content == SMALL_PNG - class TestSpamCheckerLegacy: """A spam checker module that rejects all media that includes the bytes diff --git a/tests/rest/client/test_media.py b/tests/rest/client/test_media.py index 12882b7081..b25861022a 100644 --- a/tests/rest/client/test_media.py +++ b/tests/rest/client/test_media.py @@ -5162,39 +5162,60 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: def _create_local_media_with_sha256_path(self) -> str: assert isinstance(self.repo, MediaRepository) + media_id = random_string(24) + # Curate a specialized FileInfo that includes sha256 data, then file will be + # forced to the new sha256 path + file_info = FileInfo( + server_name=None, + file_id=media_id, + sha256=SMALL_PNG_SHA256, + ) + expected_path = self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256) - mxc_uri = self.get_success( - self.repo.create_or_update_content( + ctx = self.repo.media_storage.store_into_file(file_info) + (f, fname) = self.get_success(ctx.__aenter__()) + f.write(SMALL_PNG) + self.get_success(ctx.__aexit__(None, None, None)) + + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, media_type="image/png", - upload_name="test_png_upload", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(self.user), - media_id=None, - restricted=True, + time_now_ms=self.clock.time_msec(), + upload_name="original_media", + media_length=67, + user_id=UserID.from_string(self.user), + sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, ) ) + assert expected_path == fname + assert os.path.exists(fname), f"File does not exist: {fname}" assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - assert not os.path.exists( - self.repo.filepaths.local_media_filepath(mxc_uri.media_id) - ) - return mxc_uri.media_id - def _create_local_media_with_media_id_path(self) -> str: - assert isinstance(self.repo, MediaRepository) - self.repo.use_sha256_path = False - mxc_uri = self.get_success( - self.repo.create_or_update_content( + # Some tests expect thumbnails, remember to generate them + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, media_type="image/png", - upload_name="original_media", - content=io.BytesIO(SMALL_PNG), - content_length=67, - auth_user=UserID.from_string(self.user), - media_id=None, - restricted=True, + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=SMALL_PNG_SHA256, ) ) - media_id = mxc_uri.media_id + + return media_id + + def _create_local_media_with_media_id_path(self) -> str: + assert isinstance(self.repo, MediaRepository) + media_id = random_string(24) + # Curate a specialized FileInfo that is lacking sha256 data, then file will be + # forced to the old media_id path file_info = FileInfo( server_name=None, file_id=media_id, @@ -5206,10 +5227,37 @@ def _create_local_media_with_media_id_path(self) -> str: f.write(SMALL_PNG) self.get_success(ctx.__aexit__(None, None, None)) + # Insert the appropriate data into the database, so lookups work as expected + self.get_success( + self.repo.store.store_local_media( + media_id=media_id, + media_type="image/png", + time_now_ms=self.clock.time_msec(), + upload_name="original_media", + media_length=67, + user_id=UserID.from_string(self.user), + sha256=SMALL_PNG_SHA256, + quarantined_by=None, + restricted=False, + ) + ) assert expected_path == fname assert os.path.exists(fname), f"File does not exist: {fname}" assert os.path.exists(expected_path), f"File does not exist: {expected_path}" - self.repo.use_sha256_path = True + + # Some tests expect thumbnails, remember to generate them + self.get_success( + self.repo._generate_thumbnails( + server_name=None, + media_id=media_id, + file_id=media_id, + media_type="image/png", + # We purposely do not use the sha256 here, as it directly causes the sha256 + # path for thumbnails to be populated, and that is not what we are looking + # for. + sha256=None, + ) + ) return media_id async def _mock_federation_download_media( @@ -5263,11 +5311,13 @@ def test_download_remote_media_saves_file_in_sha256_path(self) -> None: ) ) - def test_download_local_media_with_media_id_path_saves_file_in_sha256_path( + def test_download_local_media_with_media_id_path( self, ) -> None: - """Test that downloading local media with media_id path saves file in sha256 path.""" - # Create local media with media_id path. + """Test that downloading local media with media_id path serves correct file.""" + # Specifically, that a preexisting file on the legacy media paths can still be + # served from its present location. + media_id = self._create_local_media_with_media_id_path() channel = self.make_request( "GET", @@ -5277,12 +5327,9 @@ def test_download_local_media_with_media_id_path_saves_file_in_sha256_path( ) assert channel.code == 200 assert channel.result["body"] == SMALL_PNG - assert isinstance(self.repo, MediaRepository) - # Downloading local media with media_id path doesn't recreate the file in the sha256 path. - assert os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_download_local_media_with_sha256_path(self) -> None: - """Test that downloading local media with sha256 path saves file in sha256 path.""" + """Test that downloading local media with sha256 path serves correct file.""" media_id = self._create_local_media_with_sha256_path() channel = self.make_request( "GET", @@ -5292,9 +5339,6 @@ def test_download_local_media_with_sha256_path(self) -> None: ) assert channel.code == 200 assert channel.result["body"] == SMALL_PNG - assert isinstance(self.repo, MediaRepository) - assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) - assert not os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_copy_remote_media_with_sha256_path(self) -> None: """Test that copying remote media saves file in sha256 path.""" @@ -5323,7 +5367,7 @@ def test_copy_remote_media_with_sha256_path(self) -> None: ) assert remote_media is not None assert remote_media.sha256 == SMALL_PNG_SHA256 - assert remote_media.filesystem_id == SMALL_PNG_SHA256 + copied_media = self.get_success( self.repo.store.get_local_media(copied_media_mxc_uri.media_id) ) @@ -5333,6 +5377,7 @@ def test_copy_remote_media_with_sha256_path(self) -> None: # Check if the file is saved in the sha256 path only. assert isinstance(self.repo, MediaRepository) assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) + # It should NOT exist in either of the legacy paths assert not os.path.exists( self.repo.filepaths.remote_media_filepath( remote_server, remote_media.filesystem_id @@ -5373,9 +5418,8 @@ def test_copy_local_media_with_media_id(self) -> None: # The copy should create a new media id, and recreate the file in the sha256 path. assert isinstance(self.repo, MediaRepository) assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) - assert os.path.exists( - self.repo.filepaths.local_media_filepath(media_id) - ) # Copy operation does not delete the original file with media_id path. + assert os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) + # Copy operation does not delete the original file with media_id path. assert not os.path.exists( self.repo.filepaths.local_media_filepath(copied_media_mxc_uri.media_id) ) @@ -5435,13 +5479,14 @@ def test_upload_local_media_with_sha256_path(self) -> None: _, media_id = res.rsplit("/", maxsplit=1) # Check if the media is saved in the media table. - media = self.get_success(self.repo.store.get_local_media(media_id)) - assert media is not None - assert media.sha256 == SMALL_PNG_SHA256 + local_media_info = self.get_success(self.repo.store.get_local_media(media_id)) + assert local_media_info is not None + assert local_media_info.sha256 == SMALL_PNG_SHA256 # Check if the file is saved in the sha256 path. assert isinstance(self.repo, MediaRepository) assert os.path.exists(self.repo.filepaths.filepath_sha(SMALL_PNG_SHA256)) + # It should not be created on the legacy path assert not os.path.exists(self.repo.filepaths.local_media_filepath(media_id)) def test_download_local_thumbnail_with_media_id(self) -> None: @@ -5459,17 +5504,6 @@ def test_download_local_thumbnail_with_media_id(self) -> None: assert channel.code == 200, channel.result assert channel.result["body"] is not None - # Check if the thumbnail is saved in the media_id path, not in the sha256 path. - assert isinstance(self.repo, MediaRepository) - assert os.path.exists( - self.repo.filepaths.local_media_thumbnail( - media_id, 1, 1, "image/png", "scale" - ) - ) - assert not os.path.exists( - self.repo.filepaths.thumbnail_sha(media_id, 1, 1, "image/png", "scale") - ) - def test_download_local_thumbnail_with_sha256_path(self) -> None: """Test downloading thumbnail of local media with sha256 path.""" # Create local media with sha256 path. From f4a92c00caffa6d4f76c73916996e429c0230a9b Mon Sep 17 00:00:00 2001 From: Jason Little Date: Fri, 21 Nov 2025 14:06:18 -0600 Subject: [PATCH 2/3] Cargo clippy decided this was needed I guess because of a rust update? --- rust/src/http_client.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/src/http_client.rs b/rust/src/http_client.rs index b6cdf98f55..4ba8c438a7 100644 --- a/rust/src/http_client.rs +++ b/rust/src/http_client.rs @@ -137,7 +137,7 @@ fn get_runtime<'a>(reactor: &Bound<'a, PyAny>) -> PyResult = OnceCell::new(); /// Access to the `twisted.internet.defer` module. -fn defer(py: Python<'_>) -> PyResult<&Bound> { +fn defer(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { Ok(DEFER .get_or_try_init(|| py.import("twisted.internet.defer").map(Into::into))? .bind(py)) From 8fb8e19d82344be6ebbdd8ac0fd4d2a2177f801c Mon Sep 17 00:00:00 2001 From: Jason Little Date: Mon, 3 Nov 2025 07:33:56 -0600 Subject: [PATCH 3/3] Pin installed versions of python to 3.13 for linting and format checking github actions Somehow, PyO3 version 0.24.1 is being used which only has support for Python <= 3.13 even though the version pinned in Cargo.toml is 0.25.1. It is unknown where exactly this version of PyO3 is coming from, but caching oddities are suspected Revert this after Synapse is overall bumped to Python 3.14 in v1.142.0 --- .github/workflows/famedly-tests.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/famedly-tests.yml b/.github/workflows/famedly-tests.yml index a328a6f4f6..fac74cdd6d 100644 --- a/.github/workflows/famedly-tests.yml +++ b/.github/workflows/famedly-tests.yml @@ -25,7 +25,7 @@ jobs: - uses: Swatinem/rust-cache@68b3cb7503c78e67dae8373749990a220eb65352 - uses: matrix-org/setup-python-poetry@v2 with: - python-version: "3.x" + python-version: "3.13" poetry-version: "2.1.1" extras: "all" - run: poetry run scripts-dev/generate_sample_config.sh --check @@ -49,7 +49,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: "3.13" - run: .ci/scripts/check_lockfile.py lint: @@ -63,6 +63,7 @@ jobs: uses: matrix-org/setup-python-poetry@v2 with: poetry-version: "2.1.1" + python-version: "3.13" install-project: "false" - name: Run ruff check @@ -91,6 +92,7 @@ jobs: # https://github.com/matrix-org/synapse/pull/15376#issuecomment-1498983775 # To make CI green, err towards caution and install the project. install-project: "true" + python-version: "3.13" poetry-version: "2.1.1" # Cribbed from @@ -124,6 +126,7 @@ jobs: - uses: matrix-org/setup-python-poetry@v2 with: poetry-version: "2.1.1" + python-version: "3.13" extras: "all" - run: poetry run scripts-dev/check_pydantic_models.py @@ -161,7 +164,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: "3.13" - run: "pip install rstcheck" - run: "rstcheck --report-level=WARNING README.rst"