@@ -220,18 +220,37 @@ function resolveParamDitModel(name: string | undefined): string {
220220// Audio path resolution (for reference/source audio inputs)
221221// ---------------------------------------------------------------------------
222222
223+ /**
224+ * Resolves a UI audio URL (e.g. "/audio/reference-tracks/user/file.mp3") or
225+ * an absolute filesystem path to the local filesystem path that the spawned
226+ * binary can open.
227+ *
228+ * Supported input formats:
229+ * • "/audio/<rest>" — relative public URL; joined with AUDIO_DIR
230+ * (covers reference-tracks/, generated songs, etc.)
231+ * • "http[s]://host/audio/…" — absolute URL whose path starts with /audio/
232+ * • Any other absolute path — returned as-is
233+ */
223234function resolveAudioPath ( audioUrl : string ) : string {
235+ // Relative public URL produced by the UI player or upload endpoint
224236 if ( audioUrl . startsWith ( '/audio/' ) ) {
225- return path . join ( AUDIO_DIR , audioUrl . replace ( '/audio/' , '' ) ) ;
237+ const resolved = path . join ( AUDIO_DIR , audioUrl . slice ( '/audio/' . length ) ) ;
238+ console . log ( `[resolveAudio] ${ audioUrl } → ${ resolved } ` ) ;
239+ return resolved ;
226240 }
227- if ( audioUrl . startsWith ( 'http' ) ) {
241+ // Full HTTP URL — extract the path component and try again
242+ if ( audioUrl . startsWith ( 'http://' ) || audioUrl . startsWith ( 'https://' ) ) {
228243 try {
229244 const parsed = new URL ( audioUrl ) ;
230245 if ( parsed . pathname . startsWith ( '/audio/' ) ) {
231- return path . join ( AUDIO_DIR , parsed . pathname . replace ( '/audio/' , '' ) ) ;
246+ const resolved = path . join ( AUDIO_DIR , parsed . pathname . slice ( '/audio/' . length ) ) ;
247+ console . log ( `[resolveAudio] ${ audioUrl } → ${ resolved } ` ) ;
248+ return resolved ;
232249 }
233250 } catch { /* fall through */ }
234251 }
252+ // Already an absolute filesystem path — pass through
253+ console . log ( `[resolveAudio] ${ audioUrl } → (absolute path, no change)` ) ;
235254 return audioUrl ;
236255}
237256
@@ -446,61 +465,105 @@ async function runViaSpawn(
446465 const tmpDir = path . join ( AUDIO_DIR , `_tmp_${ jobId } ` ) ;
447466 await mkdir ( tmpDir , { recursive : true } ) ;
448467
468+ // ── Determine generation mode ────────────────────────────────────────────
469+ // Explicit task type drives mode selection; source audio / audio codes act
470+ // as secondary signals for backward compatibility.
471+ const taskType = params . taskType || 'text2music' ;
472+ const isCover = taskType === 'cover' || taskType === 'audio2audio' ;
473+ const isRepaint = taskType === 'repaint' ;
474+ // Passthrough: taskType explicitly set, or audio codes provided without
475+ // a source audio file (legacy callers that omit the taskType field).
476+ const isPassthru = taskType === 'passthrough' || Boolean ( params . audioCodes && ! params . sourceAudioUrl ) ;
477+ // LLM (ace-qwen3) is only needed for plain text-to-music generation.
478+ // Cover, repaint, and passthrough all skip it.
479+ const skipLm = isCover || isRepaint || isPassthru ;
480+
481+ // ── Debug: log what the UI/API client requested ──────────────────────────
482+ console . log (
483+ `[Job ${ jobId } ] Request received:` +
484+ `\n mode = ${ taskType } ` +
485+ `\n customMode = ${ params . customMode } ` +
486+ `\n ditModel = ${ params . ditModel || '(default)' } ` +
487+ `\n sourceAudio = ${ params . sourceAudioUrl || 'none' } ` +
488+ `\n repaintRegion = [${ params . repaintingStart ?? 'start' } , ${ params . repaintingEnd ?? 'end' } ]` +
489+ `\n coverStrength = ${ params . audioCoverStrength ?? 'n/a' } ` +
490+ `\n steps = ${ params . inferenceSteps ?? 8 } ` +
491+ `\n guidance = ${ params . guidanceScale ?? 0.0 } ` +
492+ `\n shift = ${ params . shift ?? 3.0 } ` +
493+ `\n skipLm = ${ skipLm } `
494+ ) ;
495+
449496 try {
450497 // ── Build request.json ─────────────────────────────────────────────────
451- // ace-qwen3 reads generation parameters from a JSON file. Only `caption`
452- // is strictly required; all other fields default to sensible values.
498+ // The JSON file is read by ace-qwen3 (text2music) or dit-vae directly
499+ // (cover / repaint / passthrough). Only include the fields each binary
500+ // actually understands so the format stays clean and predictable.
453501 const caption = params . style || 'pop music' ;
454502 const prompt = params . customMode ? caption : ( params . songDescription || caption ) ;
455- // Instrumental: pass the special "[Instrumental]" lyrics string so the LLM
503+ // Instrumental: pass the special "[Instrumental]" lyrics marker so the LLM
456504 // skips lyrics generation (as documented in the acestep.cpp README).
457505 const lyrics = params . instrumental ? '[Instrumental]' : ( params . lyrics || '' ) ;
458506
507+ // Fields common to all modes (understood by both ace-qwen3 and dit-vae)
459508 const requestJson : Record < string , unknown > = {
460- caption : prompt ,
509+ caption : prompt ,
461510 lyrics,
462- vocal_language : params . vocalLanguage || 'unknown' ,
463- seed : params . randomSeed !== false ? - 1 : ( params . seed ?? - 1 ) ,
464- lm_temperature : params . lmTemperature ?? 0.85 ,
465- lm_cfg_scale : params . lmCfgScale ?? 2.0 ,
466- lm_top_p : params . lmTopP ?? 0.9 ,
467- lm_top_k : params . lmTopK ?? 0 ,
468- lm_negative_prompt : params . lmNegativePrompt || '' ,
469- inference_steps : params . inferenceSteps ?? 8 ,
470- guidance_scale : params . guidanceScale ?? 0.0 ,
471- shift : params . shift ?? 3.0 ,
511+ seed : params . randomSeed !== false ? - 1 : ( params . seed ?? - 1 ) ,
512+ inference_steps : params . inferenceSteps ?? 8 ,
513+ guidance_scale : params . guidanceScale ?? 0.0 ,
514+ shift : params . shift ?? 3.0 ,
472515 } ;
473- // Optional metadata (0 / empty = let the LLM fill it)
474- if ( params . bpm && params . bpm > 0 ) requestJson . bpm = params . bpm ;
475- if ( params . duration && params . duration > 0 ) requestJson . duration = params . duration ;
476- if ( params . keyScale ) requestJson . keyscale = params . keyScale ;
477- if ( params . timeSignature ) requestJson . timesignature = params . timeSignature ;
478- // Passthrough: skip the LLM when audio codes are already provided
479- if ( params . audioCodes ) requestJson . audio_codes = params . audioCodes ;
480- // Cover/audio-to-audio: strength of the source audio influence on the output
481- // (ignored in repaint mode — the mask handles everything)
482- if ( params . audioCoverStrength !== undefined && params . taskType !== 'repaint' ) {
483- requestJson . audio_cover_strength = params . audioCoverStrength ;
484- }
485- // Repaint mode: regenerate a time region while preserving the rest.
486- // Activated by setting repainting_start and/or repainting_end in the JSON.
487- // Both default to -1 (inactive): -1 on start means 0s, -1 on end means source duration.
488- if ( params . taskType === 'repaint' && params . sourceAudioUrl ) {
489- requestJson . repainting_start = params . repaintingStart ?? - 1 ;
490- requestJson . repainting_end = params . repaintingEnd ?? - 1 ;
516+
517+ // Optional music metadata (0 / empty → binary fills it in)
518+ if ( params . bpm && params . bpm > 0 ) requestJson . bpm = params . bpm ;
519+ if ( params . duration && params . duration > 0 ) requestJson . duration = params . duration ;
520+ if ( params . keyScale ) requestJson . keyscale = params . keyScale ;
521+ if ( params . timeSignature ) requestJson . timesignature = params . timeSignature ;
522+
523+ if ( skipLm ) {
524+ // ── Cover / repaint / passthrough: ace-qwen3 is skipped ─────────────
525+ // Add only the mode-specific fields that dit-vae cares about.
526+ if ( isPassthru ) {
527+ if ( ! params . audioCodes ) {
528+ // Passthrough requires pre-computed codes — fail early with a clear message
529+ throw new Error ( "task_type='passthrough' requires pre-computed audio_codes" ) ;
530+ }
531+ requestJson . audio_codes = params . audioCodes ;
532+ } else if ( isCover ) {
533+ // Cover / audio-to-audio: strength of the source audio influence (0–1)
534+ if ( params . audioCoverStrength !== undefined ) {
535+ requestJson . audio_cover_strength = params . audioCoverStrength ;
536+ }
537+ } else if ( isRepaint ) {
538+ // Repaint: regenerate only the specified time region; preserve the rest.
539+ // Both default to -1: start=-1 → 0 s, end=-1 → full source duration.
540+ // Note: sourceAudioUrl is guaranteed here — validated in processGeneration.
541+ requestJson . repainting_start = params . repaintingStart ?? - 1 ;
542+ requestJson . repainting_end = params . repaintingEnd ?? - 1 ;
543+ }
544+ } else {
545+ // ── Text-to-music: include LM parameters for ace-qwen3 ──────────────
546+ requestJson . vocal_language = params . vocalLanguage || 'unknown' ;
547+ requestJson . lm_temperature = params . lmTemperature ?? 0.85 ;
548+ requestJson . lm_cfg_scale = params . lmCfgScale ?? 2.0 ;
549+ requestJson . lm_top_p = params . lmTopP ?? 0.9 ;
550+ requestJson . lm_top_k = params . lmTopK ?? 0 ;
551+ requestJson . lm_negative_prompt = params . lmNegativePrompt || '' ;
491552 }
492553
493554 const requestPath = path . join ( tmpDir , 'request.json' ) ;
494555 await writeFile ( requestPath , JSON . stringify ( requestJson , null , 2 ) ) ;
556+ console . log ( `[Job ${ jobId } ] Request JSON written to ${ requestPath } :` ) ;
557+ console . log ( JSON . stringify ( requestJson , null , 2 ) ) ;
495558
496559 // ── Step 1: ace-qwen3 — LLM (lyrics + audio codes) ────────────────────
497560 // Skipped when:
498- // • audio_codes are provided (passthrough) — codes are already known
499- // • sourceAudioUrl is provided (cover/ audio-to-audio) — dit-vae derives
500- // codes directly from the source audio; running ace-qwen3 is not needed
561+ // • taskType is cover / audio2audio / repaint — dit-vae derives tokens
562+ // directly from the source audio; running ace-qwen3 is not needed
563+ // • taskType is passthrough — audio codes are already provided
501564 let enrichedPaths : string [ ] = [ ] ;
502565
503- if ( ! params . audioCodes && ! params . sourceAudioUrl ) {
566+ if ( ! skipLm ) {
504567 job . stage = 'LLM: generating lyrics and audio codes…' ;
505568
506569 const lmBin = config . acestep . lmBin ! ;
@@ -513,7 +576,7 @@ async function runViaSpawn(
513576 if ( batchSize > 1 ) lmArgs . push ( '--batch' , String ( batchSize ) ) ;
514577 lmArgs . push ( ...parseExtraArgs ( process . env . ACE_QWEN3_EXTRA_ARGS ) ) ;
515578
516- console . log ( `[Spawn] Job ${ jobId } : ace-qwen3 ${ lmArgs . slice ( 0 , 6 ) . join ( ' ' ) } … ` ) ;
579+ console . log ( `[Job ${ jobId } ] Running ace-qwen3:\n ${ lmBin } ${ lmArgs . join ( ' ' ) } ` ) ;
517580 await runBinary ( lmBin , lmArgs , 'ace-qwen3' , undefined , makeLmProgressHandler ( job ) ) ;
518581
519582 // Collect enriched JSON files produced by ace-qwen3:
@@ -528,24 +591,33 @@ async function runViaSpawn(
528591 if ( enrichedPaths . length === 0 ) {
529592 throw new Error ( 'ace-qwen3 produced no enriched request files' ) ;
530593 }
594+ console . log ( `[Job ${ jobId } ] ace-qwen3 produced ${ enrichedPaths . length } enriched file(s): ${ enrichedPaths . join ( ', ' ) } ` ) ;
531595 } else {
532- // Passthrough: use the original request.json directly
533- // (audio codes provided, or source audio supplied for cover/audio-to-audio mode)
596+ // Cover / repaint / passthrough: pass the original request.json directly
597+ // to dit-vae; no LLM enrichment step needed.
534598 enrichedPaths = [ requestPath ] ;
599+ console . log ( `[Job ${ jobId } ] LLM step skipped (mode=${ taskType } ); passing request.json directly to dit-vae` ) ;
535600 }
536601
537602 // ── Step 2: dit-vae — DiT + VAE (audio synthesis) ──────────────────────
538603 job . stage = 'DiT+VAE: synthesising audio…' ;
539604
540- const ditVaeBin = config . acestep . ditVaeBin ! ;
541- const textEncoderModel = config . acestep . textEncoderModel ;
542- const ditModel = resolveParamDitModel ( params . ditModel ) ;
543- const vaeModel = config . acestep . vaeModel ;
605+ const ditVaeBin = config . acestep . ditVaeBin ! ;
606+ const textEncoderModel = config . acestep . textEncoderModel ;
607+ const ditModel = resolveParamDitModel ( params . ditModel ) ;
608+ const vaeModel = config . acestep . vaeModel ;
544609
545610 if ( ! textEncoderModel ) throw new Error ( 'Text-encoder model not found — run models.sh first' ) ;
546611 if ( ! ditModel ) throw new Error ( 'DiT model not found — run models.sh first' ) ;
547612 if ( ! vaeModel ) throw new Error ( 'VAE model not found — run models.sh first' ) ;
548613
614+ console . log (
615+ `[Job ${ jobId } ] Resolved model paths:` +
616+ `\n text-encoder = ${ textEncoderModel } ` +
617+ `\n dit = ${ ditModel } ` +
618+ `\n vae = ${ vaeModel } `
619+ ) ;
620+
549621 const ditArgs : string [ ] = [
550622 '--request' , ...enrichedPaths ,
551623 '--text-encoder' , textEncoderModel ,
@@ -556,10 +628,14 @@ async function runViaSpawn(
556628 const batchSize = Math . min ( Math . max ( params . batchSize ?? 1 , 1 ) , 8 ) ;
557629 if ( batchSize > 1 ) ditArgs . push ( '--batch' , String ( batchSize ) ) ;
558630
559- if ( params . sourceAudioUrl ) ditArgs . push ( '--src-audio' , resolveAudioPath ( params . sourceAudioUrl ) ) ;
631+ // Cover and repaint modes both require a source audio file
632+ if ( params . sourceAudioUrl ) {
633+ const srcAudioPath = resolveAudioPath ( params . sourceAudioUrl ) ;
634+ ditArgs . push ( '--src-audio' , srcAudioPath ) ;
635+ }
560636 ditArgs . push ( ...parseExtraArgs ( process . env . DIT_VAE_EXTRA_ARGS ) ) ;
561637
562- console . log ( `[Spawn] Job ${ jobId } : dit-vae ${ ditArgs . slice ( 0 , 6 ) . join ( ' ' ) } … ` ) ;
638+ console . log ( `[Job ${ jobId } ] Running dit-vae:\n ${ ditVaeBin } ${ ditArgs . join ( ' ' ) } ` ) ;
563639 await runBinary ( ditVaeBin , ditArgs , 'dit-vae' , undefined , makeDitVaeProgressHandler ( job ) ) ;
564640
565641 // ── Collect generated WAV files ─────────────────────────────────────────
@@ -608,7 +684,7 @@ async function runViaSpawn(
608684 status : 'succeeded' ,
609685 } ;
610686 job . rawResponse = enrichedMeta ;
611- console . log ( `[Spawn] Job ${ jobId } : completed with ${ audioUrls . length } audio file(s)` ) ;
687+ console . log ( `[Job ${ jobId } ] Completed successfully with ${ audioUrls . length } audio file(s): ${ audioUrls . join ( ', ' ) } ` ) ;
612688
613689 // Clean up tmp directory
614690 await rm ( tmpDir , { recursive : true , force : true } ) . catch ( ( ) => { /* best-effort */ } ) ;
@@ -843,16 +919,28 @@ async function processGeneration(
843919 job . status = 'running' ;
844920 job . stage = 'Starting generation...' ;
845921
922+ const mode = useSpawnMode ( params ) ? 'spawn' : 'http' ;
923+ console . log (
924+ `[Job ${ jobId } ] Starting generation (${ mode } mode):` +
925+ `\n taskType = ${ params . taskType || 'text2music' } ` +
926+ `\n customMode = ${ params . customMode } ` +
927+ `\n ditModel = ${ params . ditModel || '(default)' } ` +
928+ `\n sourceAudio = ${ params . sourceAudioUrl || 'none' } ` +
929+ `\n audioCodes = ${ params . audioCodes ? '[provided]' : 'none' } `
930+ ) ;
931+
846932 if ( ( params . taskType === 'cover' || params . taskType === 'audio2audio' ) &&
847933 ! params . sourceAudioUrl && ! params . audioCodes ) {
848934 job . status = 'failed' ;
849935 job . error = `task_type='${ params . taskType } ' requires a source audio or audio codes` ;
936+ console . error ( `[Job ${ jobId } ] Validation failed: ${ job . error } ` ) ;
850937 return ;
851938 }
852939
853940 if ( params . taskType === 'repaint' && ! params . sourceAudioUrl ) {
854941 job . status = 'failed' ;
855942 job . error = "task_type='repaint' requires a source audio (--src-audio)" ;
943+ console . error ( `[Job ${ jobId } ] Validation failed: ${ job . error } ` ) ;
856944 return ;
857945 }
858946
@@ -864,9 +952,10 @@ async function processGeneration(
864952 await runViaHttp ( jobId , params , job ) ;
865953 }
866954 } catch ( err ) {
867- console . error ( `Job ${ jobId } failed:` , err ) ;
955+ const errMsg = err instanceof Error ? err . message : String ( err ) ;
956+ console . error ( `[Job ${ jobId } ] Generation failed: ${ errMsg } ` ) ;
868957 job . status = 'failed' ;
869- job . error = err instanceof Error ? err . message : 'Generation failed' ;
958+ job . error = errMsg || 'Generation failed' ;
870959 }
871960}
872961
0 commit comments