From a51ee95ec8abcaa100d1fd4846078c634d366268 Mon Sep 17 00:00:00 2001 From: Josh Date: Sun, 1 Mar 2026 11:39:26 -0500 Subject: [PATCH 1/3] refactor(files_external/s3): clarify doesDirectoryExist flow and docs Improve readability of doesDirectoryExist() in AmazonS3 by clarifying comments, variable names, and control flow without changing behavior. Some notes for follow-ups. Signed-off-by: Josh --- .../lib/Lib/Storage/AmazonS3.php | 56 +++++++++++++------ 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/apps/files_external/lib/Lib/Storage/AmazonS3.php b/apps/files_external/lib/Lib/Storage/AmazonS3.php index 4da56c9527d9b..42f04f0ea52cf 100644 --- a/apps/files_external/lib/Lib/Storage/AmazonS3.php +++ b/apps/files_external/lib/Lib/Storage/AmazonS3.php @@ -135,54 +135,76 @@ private function headObject(string $key): array|false { /** * Return true if directory exists * - * There are no folders in s3. A folder like structure could be archived - * by prefixing files with the folder name. + * There are no folders in S3. A folder-like structure is represented either + * by object keys sharing a prefix, or by an explicit directory marker object. * - * Implementation from flysystem-aws-s3-v3: + * Implementation inspired by flysystem-aws-s3-v3: * https://github.com/thephpleague/flysystem-aws-s3-v3/blob/8241e9cc5b28f981e0d24cdaf9867f14c7498ae4/src/AwsS3Adapter.php#L670-L694 * - * @throws \Exception + * @throws S3Exception */ private function doesDirectoryExist(string $path): bool { - if ($path === '.' || $path === '') { + if ($path === '.' || $path === '') { // THE root path always exists return true; } - $path = rtrim($path, '/') . '/'; - if (isset($this->directoryCache[$path])) { + $path = rtrim($path, '/') . '/'; // normalize to S3 directory prefix representation + + if (isset($this->directoryCache[$path])) { // cache hit return $this->directoryCache[$path]; } + try { - // Maybe this isn't an actual key, but a prefix. - // Do a prefix listing of objects to determine. + // Check for any objects with the prefix ("$path/*"). + // Returns both "non-empty" directories and "empty" directory markers in most cases. $result = $this->getConnection()->listObjectsV2([ 'Bucket' => $this->bucket, 'Prefix' => $path, 'MaxKeys' => 1, ]); - if (isset($result['Contents'])) { + $hasPrefixedObjects = isset($result['Contents']); + if ($hasPrefixedObjects) { $this->directoryCache[$path] = true; return true; } - // empty directories have their own object - $object = $this->headObject($path); - - if ($object) { + // Check for an object with the exact key ("$path/"). + // Fallback for edge cases where the "empty" directory marker wasn't returned above for some reason. + // In practice this fallback (probably) rarely needed, but we call it anyway (impacting performance). + // @todo: determine what scenarios this fallback is really needed and consider removing or at least + // turning off by default (gating it). + $directoryMarker = $this->headObject($path); + if ($directoryMarker) { $this->directoryCache[$path] = true; return true; } + + $this->directoryCache[$path] = false; + return false; } catch (S3Exception $e) { if ($e->getStatusCode() >= 400 && $e->getStatusCode() < 500) { + // Treat client-side 4xx as non-existing for cache purposes. $this->directoryCache[$path] = false; } throw $e; } - - $this->directoryCache[$path] = false; - return false; + // CONCERNS/Checklist: + // - Based on ancient version of flysystem + // - helpers to reduce logic duplication and improve clarity + // - general readability (naming, structure, etc.) + // - shared code with other functions + // - similar logic with other functions + // - general robustness + // - handling of non-404s from headObject() + // - necessity / default use of headObject() fallback (performance hit; unnecessary with AWS and probably most others...) + // - Exception handling policy still rethrows 4xx after caching false, but doesn't return false + // - handling of different error scenarios from listObjectsV2() + // - caching interaction (short-lived -- user facing tasks) + // - caching interaction (long-lived -- e.g. scans/non-user tasks) + // - general performance + // - common root checks and path & prefix normalization throughout class } protected function remove(string $path): bool { From d57b69615cae8abae6fd59df31b7899823da7938 Mon Sep 17 00:00:00 2001 From: Josh Date: Sun, 1 Mar 2026 11:52:06 -0500 Subject: [PATCH 2/3] refactor(Storage/S3): tighten existence check on Contents A bit more defensive. Signed-off-by: Josh --- apps/files_external/lib/Lib/Storage/AmazonS3.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/apps/files_external/lib/Lib/Storage/AmazonS3.php b/apps/files_external/lib/Lib/Storage/AmazonS3.php index 42f04f0ea52cf..a7d3db3971aad 100644 --- a/apps/files_external/lib/Lib/Storage/AmazonS3.php +++ b/apps/files_external/lib/Lib/Storage/AmazonS3.php @@ -138,9 +138,8 @@ private function headObject(string $key): array|false { * There are no folders in S3. A folder-like structure is represented either * by object keys sharing a prefix, or by an explicit directory marker object. * - * Implementation inspired by flysystem-aws-s3-v3: - * https://github.com/thephpleague/flysystem-aws-s3-v3/blob/8241e9cc5b28f981e0d24cdaf9867f14c7498ae4/src/AwsS3Adapter.php#L670-L694 - * + * Implementation inspired by https://github.com/thephpleague/flysystem-aws-s3-v3/ + * * @throws S3Exception */ private function doesDirectoryExist(string $path): bool { @@ -163,7 +162,7 @@ private function doesDirectoryExist(string $path): bool { 'MaxKeys' => 1, ]); - $hasPrefixedObjects = isset($result['Contents']); + $hasPrefixedObjects = !empty($result['Contents']); if ($hasPrefixedObjects) { $this->directoryCache[$path] = true; return true; From 705e015d11aead509c7d44f668d56030affedd35 Mon Sep 17 00:00:00 2001 From: Josh Date: Sun, 1 Mar 2026 12:07:06 -0500 Subject: [PATCH 3/3] refactor(Storage/S3): add cacheDirectoryExists helper Signed-off-by: Josh --- .../lib/Lib/Storage/AmazonS3.php | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/apps/files_external/lib/Lib/Storage/AmazonS3.php b/apps/files_external/lib/Lib/Storage/AmazonS3.php index a7d3db3971aad..d2cdf048640bd 100644 --- a/apps/files_external/lib/Lib/Storage/AmazonS3.php +++ b/apps/files_external/lib/Lib/Storage/AmazonS3.php @@ -164,8 +164,7 @@ private function doesDirectoryExist(string $path): bool { $hasPrefixedObjects = !empty($result['Contents']); if ($hasPrefixedObjects) { - $this->directoryCache[$path] = true; - return true; + return $this->cacheDirectoryExists($path, true); } // Check for an object with the exact key ("$path/"). @@ -175,35 +174,32 @@ private function doesDirectoryExist(string $path): bool { // turning off by default (gating it). $directoryMarker = $this->headObject($path); if ($directoryMarker) { - $this->directoryCache[$path] = true; - return true; + return $this->cacheDirectoryExists($path, true); } - $this->directoryCache[$path] = false; - return false; + // Not found + return $this->cacheDirectoryExists($path, false); } catch (S3Exception $e) { if ($e->getStatusCode() >= 400 && $e->getStatusCode() < 500) { // Treat client-side 4xx as non-existing for cache purposes. + // @todo: this may be incorrect; still rethrows 4xx after caching false, but doesn't return false. $this->directoryCache[$path] = false; } throw $e; } // CONCERNS/Checklist: - // - Based on ancient version of flysystem - // - helpers to reduce logic duplication and improve clarity - // - general readability (naming, structure, etc.) - // - shared code with other functions - // - similar logic with other functions - // - general robustness + // - handling of different error scenarios from listObjectsV2() // - handling of non-404s from headObject() // - necessity / default use of headObject() fallback (performance hit; unnecessary with AWS and probably most others...) - // - Exception handling policy still rethrows 4xx after caching false, but doesn't return false - // - handling of different error scenarios from listObjectsV2() // - caching interaction (short-lived -- user facing tasks) // - caching interaction (long-lived -- e.g. scans/non-user tasks) - // - general performance - // - common root checks and path & prefix normalization throughout class + // - common root checks and path & prefix normalization (throughout class) + } + + private function cacheDirectoryExists(string $path, bool $exists): bool { + $this->directoryCache[$path] = $exists; + return $exists; } protected function remove(string $path): bool {