From 5ea32baffb664af0dd98d9eb998c1fb5536a9e41 Mon Sep 17 00:00:00 2001 From: digital-pro Date: Sun, 1 Feb 2026 15:30:49 -0800 Subject: [PATCH 1/2] optimize media asset bucket listing Align bucket prefixes with audio/visual layout to reduce listing scope. --- .../src/tasks/shared/helpers/getBucketName.ts | 10 +++++- .../tasks/shared/helpers/getMediaAssets.ts | 35 +++++++++++-------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/task-launcher/src/tasks/shared/helpers/getBucketName.ts b/task-launcher/src/tasks/shared/helpers/getBucketName.ts index 4a7deabe..20a34c61 100644 --- a/task-launcher/src/tasks/shared/helpers/getBucketName.ts +++ b/task-launcher/src/tasks/shared/helpers/getBucketName.ts @@ -10,5 +10,13 @@ export function getBucketName( ? TASK_BUCKET_NAMES_DEV[assetType as keyof typeof TASK_BUCKET_NAMES_DEV] : TASK_BUCKET_NAMES_PROD[assetType as keyof typeof TASK_BUCKET_NAMES_PROD]; - return `${bucket}/${assetType === 'audio' ? language : taskName}`; + if (assetType === 'audio') { + return language ? `${bucket}/${language}` : bucket; + } + + if (assetType === 'visual') { + return `${bucket}/${taskName}`; + } + + return `${bucket}/${taskName}`; } diff --git a/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts b/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts index 712f60d5..52b2172e 100644 --- a/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts +++ b/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts @@ -1,5 +1,3 @@ -//@ts-ignore -import { getDevice } from '@bdelab/roar-utils'; import { camelize } from './camelize'; type CategorizedObjectsType = { @@ -30,18 +28,23 @@ export async function getMediaAssets( const bucket = parts[0]; const folder = parts.slice(1).join('/'); - const baseUrl = `https://storage.googleapis.com/storage/v1/b/${bucket}/o?prefix=${folder}/`; + const prefix = folder ? `${folder}/` : ''; + const baseUrl = `https://storage.googleapis.com/storage/v1/b/${bucket}/o`; + const params = new URLSearchParams({ prefix }); let url = baseUrl; if (nextPageToken) { - url += `&pageToken=${nextPageToken}`; + params.set('pageToken', nextPageToken); + } + if (params.toString()) { + url += `?${params.toString()}`; } const response = await fetch(url); const data: ResponseDataType = await response.json(); data.items.forEach((item) => { - if (isLanguageAndDeviceValid(item.name, taskName, language) && isWhitelisted(item.name, whitelist)) { + if (isLanguageAndTaskValid(item.name, language, taskName) && isWhitelisted(item.name, whitelist)) { const contentType = item.contentType; const id = item.name; const path = `https://storage.googleapis.com/${bucket}/${id}`; @@ -59,26 +62,28 @@ export async function getMediaAssets( }); if (data.nextPageToken) { - return getMediaAssets(bucketName, whitelist, taskName, language, data.nextPageToken, categorizedObjects); + return getMediaAssets(bucketName, whitelist, language, taskName, data.nextPageToken, categorizedObjects); } else { return categorizedObjects; } } -function isLanguageAndDeviceValid(filePath: string, languageCode: string, taskName: string) { +function isLanguageAndTaskValid(filePath: string, languageCode: string, taskName: string) { const parts = filePath.split('/'); - - if (parts.length !== 3) { + if (parts.length < 3) { return false; - } else if (parts[0] === 'visual') { - // visual assets have task prefix + } + + const assetType = parts[0]; + if (assetType === 'audio') { + return parts[1] === languageCode && parts[2].length !== 0; + } + + if (assetType === 'visual') { return parts[1] === taskName && parts[2].length !== 0; - } else if (parts[0] === 'audio') { - // audio assets have language prefix - return parts[1] == languageCode && parts[2].length !== 0; } - return false; // Not a valid path + return false; } // TODO: allow nested whitelisting (whitelisting within an already whitelisted folder) From 1dee8f20bc04855981ecb1fefced3315caed4e00 Mon Sep 17 00:00:00 2001 From: Zach Watson Date: Thu, 26 Feb 2026 14:27:53 -0800 Subject: [PATCH 2/2] use assetsPerTask.json to avoid getting extra language asset metadata --- task-launcher/src/index.ts | 19 ++--- .../src/tasks/shared/helpers/getBucketName.ts | 4 - .../tasks/shared/helpers/getMediaAssets.ts | 83 +++++++++++-------- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/task-launcher/src/index.ts b/task-launcher/src/index.ts index c2185b2f..28ef159f 100644 --- a/task-launcher/src/index.ts +++ b/task-launcher/src/index.ts @@ -56,9 +56,16 @@ export class TaskLauncher { const languageAudioBucket = getBucketName('shared', isDev, 'audio', language); const sharedAudioBucket = getBucketName('shared', isDev, 'audio', 'shared'); + await getAssetsPerTask(isDev); + + const taskAudioAssetNames = [ + ...taskStore().assetsPerTask[taskName].audio, + ...taskStore().assetsPerTask.shared.audio, + ]; + try { // will avoid language folder if not provided - languageAudioAssets = await getMediaAssets(languageAudioBucket, {}, taskName, language); + languageAudioAssets = await getMediaAssets(languageAudioBucket, {}, taskName, language, taskAudioAssetNames); sharedAudioAssets = await getMediaAssets(sharedAudioBucket, {}, taskName, 'shared'); taskVisualAssets = await getMediaAssets(taskVisualBucket, {}, taskName, language); sharedVisualAssets = await getMediaAssets(sharedVisualBucket, {}, 'shared', language); @@ -77,16 +84,6 @@ export class TaskLauncher { await getCorpus(config, isDev); } - await getAssetsPerTask(isDev); - - const taskAudioAssetNames = [ - ...taskStore().assetsPerTask[taskName].audio, - ...taskStore().assetsPerTask.shared.audio, - ]; - - // filter out language audio not relevant to current task - languageAudioAssets = filterMedia(languageAudioAssets, [], taskAudioAssetNames, []); - mediaAssets = combineMediaAssets([languageAudioAssets, sharedAudioAssets, taskVisualAssets, sharedVisualAssets]); // Expose resolved media assets for e2e validation (dev/test only) diff --git a/task-launcher/src/tasks/shared/helpers/getBucketName.ts b/task-launcher/src/tasks/shared/helpers/getBucketName.ts index 20a34c61..db453fc0 100644 --- a/task-launcher/src/tasks/shared/helpers/getBucketName.ts +++ b/task-launcher/src/tasks/shared/helpers/getBucketName.ts @@ -14,9 +14,5 @@ export function getBucketName( return language ? `${bucket}/${language}` : bucket; } - if (assetType === 'visual') { - return `${bucket}/${taskName}`; - } - return `${bucket}/${taskName}`; } diff --git a/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts b/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts index 52b2172e..d888075e 100644 --- a/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts +++ b/task-launcher/src/tasks/shared/helpers/getMediaAssets.ts @@ -19,56 +19,67 @@ type ResponseDataType = { export async function getMediaAssets( bucketName: string, whitelist: Record = {}, - language: string, taskName: string, + language: string, + requiredAssetNames?: string[], nextPageToken = '', - categorizedObjects: CategorizedObjectsType = { images: {}, audio: {}, video: {} }, + categorizedObjects: CategorizedObjectsType = { images: {}, audio: {}, video: {} } ) { const parts = bucketName.split('/'); const bucket = parts[0]; const folder = parts.slice(1).join('/'); - const prefix = folder ? `${folder}/` : ''; - const baseUrl = `https://storage.googleapis.com/storage/v1/b/${bucket}/o`; - const params = new URLSearchParams({ prefix }); - let url = baseUrl; - if (nextPageToken) { - params.set('pageToken', nextPageToken); - } - if (params.toString()) { - url += `?${params.toString()}`; - } + if (requiredAssetNames) { + requiredAssetNames.forEach((assetName) => { + const path = `https://storage.googleapis.com/${bucket}/${prefix}${assetName}.mp3`; - const response = await fetch(url); - const data: ResponseDataType = await response.json(); - - data.items.forEach((item) => { - if (isLanguageAndTaskValid(item.name, language, taskName) && isWhitelisted(item.name, whitelist)) { - const contentType = item.contentType; - const id = item.name; - const path = `https://storage.googleapis.com/${bucket}/${id}`; - const fileName = id.split('/').pop()?.split('.')[0] || ''; - const camelCaseFileName = camelize(fileName); - - if (contentType.startsWith('image/')) { - categorizedObjects.images[camelCaseFileName] = path; - } else if (contentType.startsWith('audio/')) { - categorizedObjects.audio[camelCaseFileName] = path; - } else if (contentType.startsWith('video/')) { - categorizedObjects.video[camelCaseFileName] = path; - } - } - }); + categorizedObjects.audio[camelize(assetName)] = path; + }); - if (data.nextPageToken) { - return getMediaAssets(bucketName, whitelist, language, taskName, data.nextPageToken, categorizedObjects); - } else { return categorizedObjects; + } else { + const baseUrl = `https://storage.googleapis.com/storage/v1/b/${bucket}/o`; + const params = new URLSearchParams({ prefix, fields: 'items(name,contentType),nextPageToken' }); + + let url = baseUrl; + if (nextPageToken) { + params.set('pageToken', nextPageToken); + } + if (params.toString()) { + url += `?${params.toString()}`; + } + + const response = await fetch(url); + const data: ResponseDataType = await response.json(); + + data.items.forEach((item) => { + if (isLanguageAndTaskValid(item.name, taskName, language) && isWhitelisted(item.name, whitelist)) { + const contentType = item.contentType; + const id = item.name; + const path = `https://storage.googleapis.com/${bucket}/${id}`; + const fileName = id.split('/').pop()?.split('.')[0] || ''; + const camelCaseFileName = camelize(fileName); + + if (contentType.startsWith('image/')) { + categorizedObjects.images[camelCaseFileName] = path; + } else if (contentType.startsWith('audio/')) { + categorizedObjects.audio[camelCaseFileName] = path; + } else if (contentType.startsWith('video/')) { + categorizedObjects.video[camelCaseFileName] = path; + } + } + }); + + if (data.nextPageToken) { + return getMediaAssets(bucketName, whitelist, taskName, language, requiredAssetNames, data.nextPageToken, categorizedObjects); + } else { + return categorizedObjects; + } } } -function isLanguageAndTaskValid(filePath: string, languageCode: string, taskName: string) { +function isLanguageAndTaskValid(filePath: string, taskName: string, languageCode: string) { const parts = filePath.split('/'); if (parts.length < 3) { return false;