diff --git a/.changes/sound-api-2 b/.changes/sound-api-2 new file mode 100644 index 000000000..2e5b0cb30 --- /dev/null +++ b/.changes/sound-api-2 @@ -0,0 +1 @@ +minor type="added" "SoundPlayer API for prepared audio clips with local playback and best-effort remote playback" diff --git a/Sources/LiveKit/Audio/AudioSessionEngineObserver.swift b/Sources/LiveKit/Audio/AudioSessionEngineObserver.swift index caf77c181..dc29d2fcb 100644 --- a/Sources/LiveKit/Audio/AudioSessionEngineObserver.swift +++ b/Sources/LiveKit/Audio/AudioSessionEngineObserver.swift @@ -69,13 +69,15 @@ public class AudioSessionEngineObserver: AudioEngineObserver, Loggable, @uncheck var isAutomaticConfigurationEnabled: Bool = true var isAutomaticDeactivationEnabled: Bool = true - var isPlayoutEnabled: Bool = false - var isRecordingEnabled: Bool = false var isSpeakerOutputPreferred: Bool = true + + var sessionRequirements: [UUID: SessionRequirement] = [:] } let _state = StateSync(State()) + private let sessionRequirementId = UUID() + public var next: (any AudioEngineObserver)? { get { _state.next } set { _state.mutate { $0.next = newValue } } @@ -85,14 +87,64 @@ public class AudioSessionEngineObserver: AudioEngineObserver, Loggable, @uncheck _state.onDidMutate = { [weak self] new, old in guard let self, new.isSpeakerOutputPreferred != old.isSpeakerOutputPreferred else { return } - _ = configureIfNeeded(oldState: old, newState: new) + do { + try configureIfNeeded(oldState: old, newState: new) + } catch { + log("Failed to configure audio session after speaker preference change: \(error)", .error) + } + } + } + + /// Acquires an audio session requirement handle for external ownership. + /// + /// Use this to keep the audio session active from external components + /// (e.g., ``SoundPlayer``) that need playout or recording independently + /// of the WebRTC engine lifecycle. + /// + /// - Throws: ``LiveKitError`` if the audio session fails to configure or activate. + public func acquire(requirement: SessionRequirement) throws -> SessionRequirementHandle { + let id = UUID() + try set(requirement: requirement, for: id) + return SessionRequirementHandle(releaseImpl: { [weak self] in + guard let self else { return } + try removeRequirement(for: id) + }) + } + + private func set(requirement: SessionRequirement, for id: UUID) throws { + try updateRequirements { + if requirement == .none { + $0.removeValue(forKey: id) + } else { + $0[id] = requirement + } + } + } + + fileprivate func removeRequirement(for id: UUID) throws { + try updateRequirements { + $0.removeValue(forKey: id) + } + } + + private func updateRequirements(_ block: (inout [UUID: SessionRequirement]) -> Void) throws { + try _state.mutate { + let oldState = $0 + block(&$0.sessionRequirements) + guard $0.sessionRequirements != oldState.sessionRequirements else { return } + do { + try configureIfNeeded(oldState: oldState, newState: $0) + } catch { + $0 = oldState + throw LiveKitError(.audioSession, message: "Failed to configure audio session") + } } } // MARK: - Audio Session Configuration - private func configureIfNeeded(oldState: State, newState: State) -> Int { - guard newState.isAutomaticConfigurationEnabled else { return 0 } + private func configureIfNeeded(oldState: State, newState: State) throws { + guard newState.isAutomaticConfigurationEnabled else { return } // Deprecated: `customConfigureAudioSessionFunc` overrides the default configuration. // This path does not support error propagation since the legacy func returns Void. @@ -101,20 +153,17 @@ public class AudioSessionEngineObserver: AudioEngineObserver, Loggable, @uncheck let oldLegacy = AudioManager.State(localTracksCount: oldState.isRecordingEnabled ? 1 : 0, remoteTracksCount: oldState.isPlayoutEnabled ? 1 : 0) let newLegacy = AudioManager.State(localTracksCount: newState.isRecordingEnabled ? 1 : 0, remoteTracksCount: newState.isPlayoutEnabled ? 1 : 0) legacyConfigFunc(newLegacy, oldLegacy) - return 0 + return } - do { - try configureAudioSession(oldState: oldState, newState: newState) - return 0 - } catch { - return kAudioEngineErrorFailedToConfigureAudioSession - } + try configureAudioSession(oldState: oldState, newState: newState) } @Sendable private func configureAudioSession(oldState: State, newState: State) throws { let session = AVAudioSession.sharedInstance() + log("configure isRecordingEnabled: \(newState.isRecordingEnabled), isPlayoutEnabled: \(newState.isPlayoutEnabled)") + if (!newState.isPlayoutEnabled && !newState.isRecordingEnabled) && (oldState.isPlayoutEnabled || oldState.isRecordingEnabled) { if newState.isAutomaticDeactivationEnabled { do { @@ -166,38 +215,31 @@ public class AudioSessionEngineObserver: AudioEngineObserver, Loggable, @uncheck // MARK: - AudioEngineObserver public func engineWillEnable(_ engine: AVAudioEngine, isPlayoutEnabled: Bool, isRecordingEnabled: Bool) -> Int { - let result: Int = _state.mutate { - let oldState = $0 - $0.isPlayoutEnabled = isPlayoutEnabled - $0.isRecordingEnabled = isRecordingEnabled - let result = configureIfNeeded(oldState: oldState, newState: $0) - if result != 0 { - // Rollback state on failure so it stays consistent with WebRTC's rollback. - $0 = oldState - } - return result + let requirement = SessionRequirement(isPlayoutEnabled: isPlayoutEnabled, isRecordingEnabled: isRecordingEnabled) + do { + try set(requirement: requirement, for: sessionRequirementId) + } catch { + return kAudioEngineErrorFailedToConfigureAudioSession } - guard result == 0 else { return result } return _state.next?.engineWillEnable(engine, isPlayoutEnabled: isPlayoutEnabled, isRecordingEnabled: isRecordingEnabled) ?? 0 } public func engineDidDisable(_ engine: AVAudioEngine, isPlayoutEnabled: Bool, isRecordingEnabled: Bool) -> Int { let nextResult = _state.next?.engineDidDisable(engine, isPlayoutEnabled: isPlayoutEnabled, isRecordingEnabled: isRecordingEnabled) ?? 0 - let result: Int = _state.mutate { - let oldState = $0 - $0.isPlayoutEnabled = isPlayoutEnabled - $0.isRecordingEnabled = isRecordingEnabled - let result = configureIfNeeded(oldState: oldState, newState: $0) - if result != 0 { - // Rollback state on failure so it stays consistent with WebRTC's rollback. - $0 = oldState - } - return result + let requirement = SessionRequirement(isPlayoutEnabled: isPlayoutEnabled, isRecordingEnabled: isRecordingEnabled) + do { + try set(requirement: requirement, for: sessionRequirementId) + } catch { + return kAudioEngineErrorFailedToConfigureAudioSession } - guard result == 0 else { return result } return nextResult } } +extension AudioSessionEngineObserver.State { + var isPlayoutEnabled: Bool { sessionRequirements.values.contains(where: \.isPlayoutEnabled) } + var isRecordingEnabled: Bool { sessionRequirements.values.contains(where: \.isRecordingEnabled) } +} + #endif diff --git a/Sources/LiveKit/Audio/Manager/AudioManager.swift b/Sources/LiveKit/Audio/Manager/AudioManager.swift index aa4c87e7b..eb1532916 100644 --- a/Sources/LiveKit/Audio/Manager/AudioManager.swift +++ b/Sources/LiveKit/Audio/Manager/AudioManager.swift @@ -22,6 +22,81 @@ import Combine internal import LiveKitWebRTC +/// Represents an audio session requirement from a specific component. +/// +/// Multiple components can independently register their requirements. On platforms that use +/// `AVAudioSession`, the session stays active as long as any component requires playout or recording. +public struct SessionRequirement: OptionSet, Sendable { + public let rawValue: UInt8 + + public static let playout = Self(rawValue: 1 << 0) + public static let recording = Self(rawValue: 1 << 1) + + public static let none: Self = [] + public static let playbackOnly: Self = [.playout] + public static let recordingOnly: Self = [.recording] + public static let playbackAndRecording: Self = [.playout, .recording] + + public init(rawValue: UInt8) { + self.rawValue = rawValue + } + + public init(isPlayoutEnabled: Bool = false, isRecordingEnabled: Bool = false) { + var rawValue: UInt8 = 0 + if isPlayoutEnabled { + rawValue |= Self.playout.rawValue + } + if isRecordingEnabled { + rawValue |= Self.recording.rawValue + } + self.init(rawValue: rawValue) + } + + public var isPlayoutEnabled: Bool { + contains(.playout) + } + + public var isRecordingEnabled: Bool { + contains(.recording) + } +} + +/// Opaque handle for an acquired audio session requirement. +/// +/// Call ``release()`` when the requirement is no longer needed. +/// If not released explicitly, the requirement is released automatically on deinit. +public final class SessionRequirementHandle: @unchecked Sendable { + private struct State { + var releaseImpl: (@Sendable () throws -> Void)? + } + + private let _state: StateSync + + init(releaseImpl: @escaping @Sendable () throws -> Void) { + _state = StateSync(State(releaseImpl: releaseImpl)) + } + + deinit { + try? releaseIfNeeded() + } + + /// Releases the associated audio session requirement. + /// + /// Releasing the same handle multiple times is a no-op. + public func release() throws { + try releaseIfNeeded() + } + + private func releaseIfNeeded() throws { + let releaseImpl = _state.mutate { state -> (@Sendable () throws -> Void)? in + let releaseImpl = state.releaseImpl + state.releaseImpl = nil + return releaseImpl + } + try releaseImpl?() + } +} + // Audio Session Configuration related public class AudioManager: Loggable { // MARK: - Public @@ -375,6 +450,17 @@ public class AudioManager: Loggable { RTC.audioDeviceModule.isEngineRunning } + /// Acquires an audio session requirement for external ownership. + /// + /// On platforms without `AVAudioSession`, this returns a no-op handle. + public func acquireSessionRequirement(_ requirement: SessionRequirement) throws -> SessionRequirementHandle { + #if os(iOS) || os(visionOS) || os(tvOS) + try audioSession.acquire(requirement: requirement) + #else + SessionRequirementHandle(releaseImpl: {}) + #endif + } + /// The mute state of internal audio engine which uses Voice Processing I/O mute API ``AVAudioInputNode.isVoiceProcessingInputMuted``. /// Normally, you do not need to set this manually since it will be handled automatically. public var isMicrophoneMuted: Bool { diff --git a/Sources/LiveKit/Audio/MixerEngineObserver.swift b/Sources/LiveKit/Audio/MixerEngineObserver.swift index 11cc3cbff..5748895ef 100644 --- a/Sources/LiveKit/Audio/MixerEngineObserver.swift +++ b/Sources/LiveKit/Audio/MixerEngineObserver.swift @@ -47,6 +47,12 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { set { _state.mutate { $0.micMixerNode.outputVolume = newValue } } } + /// Adjust the volume of sound player audio sent to remote participants. Range is 0.0 ~ 1.0. + public var soundPlayerVolume: Float { + get { _state.read { $0.soundPlayerNodes.mixerNode.outputVolume } } + set { _state.mutate { $0.soundPlayerNodes.mixerNode.outputVolume = newValue } } + } + // MARK: - Internal var appAudioNode: AVAudioPlayerNode { @@ -60,14 +66,17 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { struct State { var next: (any AudioEngineObserver)? - // AppAudio + // App audio (Input) let appNode = AVAudioPlayerNode() let appMixerNode = AVAudioMixerNode() - // Not connected for device rendering mode. + // Mic audio (Input), not connected for device rendering mode. let micNode = AVAudioPlayerNode() let micMixerNode = AVAudioMixerNode() + // Sound player audio (Input) — for sending sounds to remote participants via WebRTC + let soundPlayerNodes = AVAudioPlayerNodePool() + // Reference to mainMixerNode weak var mainMixerNode: AVAudioMixerNode? var outputVolume: Float = 1.0 @@ -92,8 +101,8 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { public func engineDidCreate(_ engine: AVAudioEngine) -> Int { log("isManualRenderingMode: \(engine.isInManualRenderingMode)") - let (appNode, appMixerNode, micNode, micMixerNode) = _state.read { - ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode) + let (appNode, appMixerNode, micNode, micMixerNode, soundPlayerNodes) = _state.read { + ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode, $0.soundPlayerNodes) } // Match the outputNode's maximumFramesToRender so our nodes can handle the same @@ -114,18 +123,21 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { "appNode=\(appNode.auAudioUnit.maximumFramesToRender), " + "appMixerNode=\(appMixerNode.auAudioUnit.maximumFramesToRender), " + "micNode=\(micNode.auAudioUnit.maximumFramesToRender), " + - "micMixerNode=\(micMixerNode.auAudioUnit.maximumFramesToRender)", .debug) + "micMixerNode=\(micMixerNode.auAudioUnit.maximumFramesToRender), " + + "soundPlayerMixerNode=\(soundPlayerNodes.mixerNode.auAudioUnit.maximumFramesToRender)", .debug) appNode.auAudioUnit.maximumFramesToRender = maxFrames appMixerNode.auAudioUnit.maximumFramesToRender = maxFrames micNode.auAudioUnit.maximumFramesToRender = maxFrames micMixerNode.auAudioUnit.maximumFramesToRender = maxFrames + soundPlayerNodes.setMaximumFramesToRender(maxFrames) log("maximumFramesToRender setting to \(maxFrames): " + "appNode=\(appNode.auAudioUnit.maximumFramesToRender), " + "appMixerNode=\(appMixerNode.auAudioUnit.maximumFramesToRender), " + "micNode=\(micNode.auAudioUnit.maximumFramesToRender), " + - "micMixerNode=\(micMixerNode.auAudioUnit.maximumFramesToRender)", .debug) + "micMixerNode=\(micMixerNode.auAudioUnit.maximumFramesToRender), " + + "soundPlayerMixerNode=\(soundPlayerNodes.mixerNode.auAudioUnit.maximumFramesToRender)", .debug) #if os(iOS) || os(visionOS) || os(tvOS) let config = LKRTCAudioSessionConfiguration.webRTC() @@ -136,6 +148,7 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { engine.attach(appMixerNode) engine.attach(micNode) engine.attach(micMixerNode) + engine.attach(soundPlayerNodes) // Invoke next return next?.engineDidCreate(engine) ?? 0 @@ -146,14 +159,15 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { // Invoke next let nextResult = next?.engineWillRelease(engine) - let (appNode, appMixerNode, micNode, micMixerNode) = _state.read { - ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode) + let (appNode, appMixerNode, micNode, micMixerNode, soundPlayerNodes) = _state.read { + ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode, $0.soundPlayerNodes) } engine.detach(appNode) engine.detach(appMixerNode) engine.detach(micNode) engine.detach(micMixerNode) + engine.detach(soundPlayerNodes) return nextResult ?? 0 } @@ -169,15 +183,19 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { } // Read nodes from state lock. - let (appNode, appMixerNode, micNode, micMixerNode) = _state.read { - ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode) + let (appNode, appMixerNode, micNode, micMixerNode, soundPlayerNodes) = _state.read { + ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode, $0.soundPlayerNodes) } // AVAudioPlayerNode doesn't support Int16 so we ensure to use Float32 - let playerNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, - sampleRate: format.sampleRate, - channels: format.channelCount, - interleaved: format.isInterleaved)! + guard let playerNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, + sampleRate: format.sampleRate, + channels: format.channelCount, + interleaved: format.isInterleaved) + else { + log("Failed to create player node format", .error) + return next?.engineWillConnectInput(engine, src: src, dst: dst, format: format, context: context) ?? 0 + } log("Connecting app -> appMixer -> mainMixer") // appAudio -> appAudioMixer -> mainMixer @@ -197,6 +215,10 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { // Always connect micMixer to mainMixer engine.connect(micMixerNode, to: mainMixerNode, format: format) + log("Connecting soundPlayerNodes -> mainMixer") + // soundPlayerNodes -> mainMixer -> WebRTC (remote) + engine.connect(soundPlayerNodes, to: mainMixerNode, format: format, playerNodeFormat: playerNodeFormat) + _state.mutate { if let previousEngineFormat = $0.playerNodeFormat, previousEngineFormat != format { // Clear cached converters when engine format changes @@ -225,12 +247,13 @@ public final class MixerEngineObserver: AudioEngineObserver, Loggable { public func engineWillStart(_ engine: AVAudioEngine, isPlayoutEnabled: Bool, isRecordingEnabled: Bool) -> Int { log("isPlayoutEnabled: \(isPlayoutEnabled), isRecordingEnabled: \(isRecordingEnabled)") - let (micNode, appNode) = _state.read { - ($0.micNode, $0.appNode) + let (micNode, appNode, soundPlayerNodes) = _state.read { + ($0.micNode, $0.appNode, $0.soundPlayerNodes) } micNode.reset() appNode.reset() + soundPlayerNodes.reset() return next?.engineWillStart(engine, isPlayoutEnabled: isPlayoutEnabled, isRecordingEnabled: isRecordingEnabled) ?? 0 } @@ -267,6 +290,49 @@ extension MixerEngineObserver { } } + /// Play a sound buffer through the input path for remote participants via WebRTC. + @discardableResult + func playSound(_ inputBuffer: AVAudioPCMBuffer, loop: Bool = false) -> SoundPlayback? { + let (isInputConnected, soundPlayerNodes, playerNodeFormat) = _state.read { + ($0.isInputConnected, $0.soundPlayerNodes, $0.playerNodeFormat) + } + + guard isInputConnected else { + log("Remote sound playback skipped because the microphone is not published. Publish the microphone to send SoundPlayer audio to remote participants.", .warning) + return nil + } + + guard let playerNodeFormat, let engine = soundPlayerNodes.engine, engine.isRunning else { + log("Remote sound playback skipped because the remote sound path is temporarily unavailable.", .warning) + return nil + } + + // Convert buffer to engine format with a properly-sized converter. + let bufferToSchedule: AVAudioPCMBuffer + if inputBuffer.format != playerNodeFormat { + let outputBufferCapacity = AudioConverter.frameCapacity(from: inputBuffer.format, + to: playerNodeFormat, + inputFrameCount: inputBuffer.frameLength) + guard let converter = AudioConverter(from: inputBuffer.format, + to: playerNodeFormat, + outputBufferCapacity: outputBufferCapacity) + else { + log("Failed to create converter for sound buffer, skipping remote sound playback", .debug) + return nil + } + bufferToSchedule = converter.convert(from: inputBuffer) + } else { + bufferToSchedule = inputBuffer + } + + do { + return try soundPlayerNodes.play(bufferToSchedule, loop: loop) + } catch { + log("Failed to play sound remotely: \(error)", .debug) + return nil + } + } + // Capture appAudio and apply conversion automatically suitable for internal audio engine. public func capture(appAudio inputBuffer: AVAudioPCMBuffer) { guard let converter = converter(for: inputBuffer.format) else { @@ -276,11 +342,11 @@ extension MixerEngineObserver { let buffer = converter.convert(from: inputBuffer) - let (isConnected, appNode) = _state.read { + let (isInputConnected, appNode) = _state.read { ($0.isInputConnected, $0.appNode) } - guard isConnected, let engine = appNode.engine, engine.isRunning else { + guard isInputConnected, let engine = appNode.engine, engine.isRunning else { log("Engine is not running", .warning) return } diff --git a/Sources/LiveKit/Audio/PlayerNodePool.swift b/Sources/LiveKit/Audio/PlayerNodePool.swift new file mode 100644 index 000000000..8511ecc3a --- /dev/null +++ b/Sources/LiveKit/Audio/PlayerNodePool.swift @@ -0,0 +1,227 @@ +/* + * Copyright 2026 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@preconcurrency import AVFAudio + +/// Represents an active sound playback that can be stopped. +protocol SoundPlayback: AnyObject, Sendable { + var isPlaying: Bool { get } + func stop() async +} + +/// Manages a pool of AVAudioPlayerNodes for concurrent audio playback. +class AVAudioPlayerNodePool: @unchecked Sendable, Loggable { + let poolSize: Int + let mixerNode = AVAudioMixerNode() + + var playerNodes: [AVAudioPlayerNode] { + executionQueue.sync { items.map(\.node) } + } + + var engine: AVAudioEngine? { + mixerNode.engine + } + + private enum NodeState { + case idle + case inUse + case stopping + } + + private struct NodeItem { + let node: AVAudioPlayerNode + var state: NodeState = .idle + var generation: UInt64 = 0 + } + + private let executionQueue = DispatchQueue(label: "audio.playerNodePool.queue") + private let stopQueue = DispatchQueue(label: "audio.playerNodePool.stop", qos: .default) + private var items: [NodeItem] + + init(poolSize: Int = 10) { + self.poolSize = poolSize + items = (0 ..< poolSize).map { _ in NodeItem(node: AVAudioPlayerNode()) } + } + + private struct AcquiredNode { + let index: Int + let node: AVAudioPlayerNode + let generation: UInt64 + } + + @discardableResult + func play(_ buffer: AVAudioPCMBuffer, loop: Bool = false) throws -> SoundPlayback { + let acquired = try executionQueue.sync { () throws -> AcquiredNode in + guard let index = items.firstIndex(where: { $0.state == .idle }) else { + throw LiveKitError(.audioEngine, message: "No available player nodes") + } + + items[index].state = .inUse + items[index].generation &+= 1 + + let node = items[index].node + let generation = items[index].generation + node.volume = 1.0 + node.pan = 0.0 + + if loop { + node.scheduleBuffer(buffer, at: nil, options: .loops) + } else { + node.scheduleBuffer(buffer, completionCallbackType: .dataPlayedBack) { [weak self] _ in + self?.executionQueue.async { [weak self] in + self?.releaseCompletedSlot(index: index, generation: generation) + } + } + } + + node.play() + + return AcquiredNode(index: index, node: node, generation: generation) + } + + return NodePlayback(node: acquired.node) { [weak self] in + guard let self else { return } + await beginStoppingSlot(index: acquired.index, generation: acquired.generation) + } + } + + func stop() { + executionQueue.sync { + for index in items.indices { + let node = items[index].node + node.stop() + items[index].state = .idle + items[index].generation &+= 1 + } + } + } + + func reset() { + executionQueue.sync { + for index in items.indices { + let node = items[index].node + node.reset() + items[index].state = .idle + items[index].generation &+= 1 + } + } + } + + func setMaximumFramesToRender(_ maxFrames: AUAudioFrameCount) { + executionQueue.sync { + mixerNode.auAudioUnit.maximumFramesToRender = maxFrames + for item in items { + item.node.auAudioUnit.maximumFramesToRender = maxFrames + } + } + } + + private func releaseCompletedSlot(index: Int, generation: UInt64) { + guard items[index].generation == generation else { return } + items[index].state = .idle + } + + private func beginStoppingSlot(index: Int, generation: UInt64) async { + let node: AVAudioPlayerNode? = await withCheckedContinuation { (continuation: CheckedContinuation) in + executionQueue.async { [self] in + guard items[index].generation == generation else { + continuation.resume(returning: nil) + return + } + + items[index].state = .stopping + items[index].generation &+= 1 + continuation.resume(returning: items[index].node) + } + } + + guard let node else { return } + + await withCheckedContinuation { (continuation: CheckedContinuation) in + stopQueue.async { [weak self] in + node.stop() + guard let self else { + continuation.resume() + return + } + executionQueue.async { [weak self] in + guard let pool = self else { + continuation.resume() + return + } + pool.finishStoppingSlot(index: index) + continuation.resume() + } + } + } + } + + private func finishStoppingSlot(index: Int) { + guard items.indices.contains(index) else { return } + items[index].state = .idle + } +} + +// MARK: - NodePlayback + +class NodePlayback: SoundPlayback, @unchecked Sendable { + private weak var node: AVAudioPlayerNode? + private let onStop: @Sendable () async -> Void + + var isPlaying: Bool { node?.isPlaying ?? false } + + init(node: AVAudioPlayerNode, onStop: @escaping @Sendable () async -> Void) { + self.node = node + self.onStop = onStop + } + + func stop() async { + await onStop() + } +} + +// MARK: - AVAudioEngine extensions + +extension AVAudioEngine { + func attach(_ playerNodePool: AVAudioPlayerNodePool) { + for playerNode in playerNodePool.playerNodes { + attach(playerNode) + } + attach(playerNodePool.mixerNode) + } + + func disconnect(_ playerNodePool: AVAudioPlayerNodePool) { + for playerNode in playerNodePool.playerNodes { + disconnectNodeOutput(playerNode) + } + disconnectNodeOutput(playerNodePool.mixerNode) + } + + func detach(_ playerNodePool: AVAudioPlayerNodePool) { + for playerNode in playerNodePool.playerNodes { + playerNode.stop() + detach(playerNode) + } + detach(playerNodePool.mixerNode) + } + + func connect(_ playerNodePool: AVAudioPlayerNodePool, to node2: AVAudioNode, format: AVAudioFormat?, playerNodeFormat: AVAudioFormat?) { + for playerNode in playerNodePool.playerNodes { + connect(playerNode, to: playerNodePool.mixerNode, format: playerNodeFormat ?? format) + } + connect(playerNodePool.mixerNode, to: node2, format: format) + } +} diff --git a/Sources/LiveKit/Audio/SoundPlayer+Types.swift b/Sources/LiveKit/Audio/SoundPlayer+Types.swift new file mode 100644 index 000000000..3eb193320 --- /dev/null +++ b/Sources/LiveKit/Audio/SoundPlayer+Types.swift @@ -0,0 +1,192 @@ +/* + * Copyright 2026 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@preconcurrency import AVFAudio +import Foundation + +@globalActor +public actor SoundPlayerActor { + public static let shared = SoundPlayerActor() + + private init() {} +} + +/// Options for controlling sound playback behavior. +public struct SoundPlaybackOptions: Sendable { + /// How to handle existing playback of the same sound. + public enum Mode: Sendable { + /// Play concurrently with any existing playback of the same sound. + case concurrent + /// Stop any existing playback of the same sound before playing. + /// + /// Replacement is scoped by prepared sound, not by destination. Existing + /// local and remote playback for the same handle are both stopped before + /// the new playback starts. + case replace + } + + /// Where the sound should be played. + public enum Destination: Sendable { + /// Play locally only (through device speakers). + case local + /// Play for remote participants only (through WebRTC). + /// + /// Remote playback is best-effort. If the WebRTC mixer input path is unavailable + /// (for example, no active remote-routing path is connected), playback is skipped. + case remote + /// Play both locally and for remote participants. + /// + /// Remote playback is best-effort and may be skipped when the WebRTC mixer input path + /// is unavailable. + case localAndRemote + + var includesLocal: Bool { + self == .local || self == .localAndRemote + } + + var includesRemote: Bool { + self == .remote || self == .localAndRemote + } + } + + public var mode: Mode + public var loop: Bool + public var destination: Destination + + public init(mode: Mode = .concurrent, loop: Bool = false, destination: Destination = .localAndRemote) { + self.mode = mode + self.loop = loop + self.destination = destination + } +} + +/// Typed reference to a prepared sound managed by ``SoundPlayer``. +/// +/// `SoundHandle` is a value type, so it is safe to store in SwiftUI state, pass through +/// view models, and copy. It does not own the underlying sound resource; call ``release()`` +/// when the prepared sound is no longer needed. +/// +/// Use ``SoundPlayer/prepare(fileURL:named:)`` to create a handle. If a sound was prepared +/// with a name, use ``SoundPlayer/sound(named:)`` to look up the current handle for that name. +public struct SoundHandle: Hashable, Sendable { + let id: UUID + + /// Plays this prepared sound with the provided options. + public func play(options: SoundPlaybackOptions = SoundPlaybackOptions()) async throws { + try await SoundPlayer.shared.play(self, options: options) + } + + /// Stops active local and/or remote playback for this prepared sound. + public func stop(destination: SoundPlaybackOptions.Destination = .localAndRemote) async { + await SoundPlayer.shared.stop(self, destination: destination) + } + + /// Releases this prepared sound and its audio session requirement. + /// + /// Other copies of the same handle become invalid after release. + public func release() async { + await SoundPlayer.shared.release(self) + } + + /// Returns `true` if this handle still refers to a prepared sound. + public var isPrepared: Bool { + get async { + await SoundPlayer.shared.isPrepared(self) + } + } + + /// Returns `true` if this prepared sound has active playback for the selected destination. + public func isPlaying(destination: SoundPlaybackOptions.Destination = .localAndRemote) async -> Bool { + await SoundPlayer.shared.isPlaying(self, destination: destination) + } +} + +@SoundPlayerActor +class PreparedSound { + let name: String? + let sourceBuffer: AVAudioPCMBuffer + let sessionRequirementHandle: SessionRequirementHandle + var cachedLocalBuffer: AVAudioPCMBuffer? + var cachedLocalBufferFormat: AVAudioFormat? + var local: [SoundPlayback] = [] + var remote: [SoundPlayback] = [] + + init(name: String?, sourceBuffer: AVAudioPCMBuffer, sessionRequirementHandle: SessionRequirementHandle) { + self.name = name + self.sourceBuffer = sourceBuffer + self.sessionRequirementHandle = sessionRequirementHandle + } + + func cleanUp() { + local.removeAll { !$0.isPlaying } + remote.removeAll { !$0.isPlaying } + } + + func stop(destination: SoundPlaybackOptions.Destination) async { + if destination.includesLocal { + for playback in local { + await playback.stop() + } + } + if destination.includesRemote { + for playback in remote { + await playback.stop() + } + } + cleanUp() + } + + func localBuffer(for playerNodeFormat: AVAudioFormat) throws -> AVAudioPCMBuffer { + if let cachedLocalBuffer, let cachedLocalBufferFormat, cachedLocalBufferFormat == playerNodeFormat { + return cachedLocalBuffer + } + + let localBuffer: AVAudioPCMBuffer + if sourceBuffer.format == playerNodeFormat { + localBuffer = sourceBuffer + } else { + let outputBufferCapacity = AudioConverter.frameCapacity(from: sourceBuffer.format, + to: playerNodeFormat, + inputFrameCount: sourceBuffer.frameLength) + guard let converter = AudioConverter(from: sourceBuffer.format, + to: playerNodeFormat, + outputBufferCapacity: outputBufferCapacity) + else { + throw LiveKitError(.soundPlayer, message: "Failed to create audio converter") + } + localBuffer = converter.convert(from: sourceBuffer) + } + + cachedLocalBuffer = localBuffer + cachedLocalBufferFormat = playerNodeFormat + return localBuffer + } +} + +struct LocalEngineState { + var connectedOutputFormat: AVAudioFormat? + var playerNodeFormat: AVAudioFormat? + var needsReconnect = false + + init(connectedOutputFormat: AVAudioFormat? = nil, + playerNodeFormat: AVAudioFormat? = nil, + needsReconnect: Bool = false) + { + self.connectedOutputFormat = connectedOutputFormat + self.playerNodeFormat = playerNodeFormat + self.needsReconnect = needsReconnect + } +} diff --git a/Sources/LiveKit/Audio/SoundPlayer.swift b/Sources/LiveKit/Audio/SoundPlayer.swift new file mode 100644 index 000000000..1f6733a5e --- /dev/null +++ b/Sources/LiveKit/Audio/SoundPlayer.swift @@ -0,0 +1,291 @@ +/* + * Copyright 2026 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@preconcurrency import AVFAudio + +/// High-level API for preparing and playing short sounds locally and over the room mixer. +/// +/// ```swift +/// let click = try await SoundPlayer.shared.prepare(fileURL: clickFileURL, named: "click") +/// try await click.play() +/// await click.release() +/// ``` +/// +/// Prepared sounds must come from local file URLs and use a format readable by `AVAudioFile`. +/// Recommended lifecycle: prepare once, play as needed, then release. +/// Preparing a sound acquires a playback session requirement and may start the local engine early. +/// Local playback uses a fixed internal player-node pool and throws if the pool is exhausted. +/// Remote playback is best-effort and typically requires the microphone to be published. +@SoundPlayerActor +public final class SoundPlayer: Loggable { + // MARK: - Public + + /// Shared sound player instance. + public static let shared = SoundPlayer() + + // MARK: - Private + + let engine = AVAudioEngine() + let playerNodePool: AVAudioPlayerNodePool + let notificationCenter: NotificationCenter + var engineConfigurationObserver: NSObjectProtocol? + + var sounds: [UUID: PreparedSound] = [:] + var soundIdsByName: [String: UUID] = [:] + var localEngineState = LocalEngineState() + + init(poolSize: Int = 10, notificationCenter: NotificationCenter = .default) { + self.notificationCenter = notificationCenter + playerNodePool = AVAudioPlayerNodePool(poolSize: poolSize) + engine.attach(playerNodePool) + engineConfigurationObserver = notificationCenter.addObserver(forName: .AVAudioEngineConfigurationChange, + object: engine, + queue: nil) + { [weak self] _ in + guard let self else { return } + Task { + await self.handleEngineConfigurationChange() + } + } + } + + deinit { + if let engineConfigurationObserver { + notificationCenter.removeObserver(engineConfigurationObserver) + } + } + + /// Decodes and caches audio, returning a handle for playback and release. + /// + /// Preparing a sound also acquires a playback session requirement and starts the + /// local engine early to reduce first-play latency. + /// + /// The returned ``SoundHandle`` is a lightweight value token. `SoundPlayer` owns the + /// prepared sound until the handle is released with ``SoundHandle/release()``. + /// + /// If `name` is provided and another prepared sound already uses the same name, + /// the previous sound is stopped, released, and replaced. Use ``sound(named:)`` to look up + /// the current handle for a name. + /// + /// - Note: Only local file URLs are supported. + /// - Note: The file format must be readable by `AVAudioFile` on the current platform. + /// - Note: Repeated playback of the same short clip should generally reuse a prepared sound + /// instead of decoding from disk each time. + @discardableResult + public func prepare(fileURL: URL, named name: String? = nil) async throws -> SoundHandle { + let readBuffer = try await Self.decodeBuffer(from: fileURL) + let sessionRequirementHandle = try AudioManager.shared.acquireSessionRequirement(.playbackOnly) + let soundId = UUID() + let replacedSoundId = name.flatMap { soundIdsByName[$0] } + + do { + try startEngineIfNeeded() + sounds[soundId] = PreparedSound(name: name, + sourceBuffer: readBuffer, + sessionRequirementHandle: sessionRequirementHandle) + if let name { + soundIdsByName[name] = soundId + } + + if let replacedSoundId { + await releaseSound(id: replacedSoundId) + } + + return SoundHandle(id: soundId) + } catch { + try? sessionRequirementHandle.release() + throw error + } + } + + /// Returns the current handle for a prepared sound associated with `name`, if any. + /// + /// Names are optional aliases. Preparing another sound with the same name replaces the + /// previous mapping, so this returns the latest prepared sound for that name. + public func sound(named name: String) -> SoundHandle? { + guard let soundId = soundIdsByName[name], sounds[soundId] != nil else { return nil } + return SoundHandle(id: soundId) + } + + /// Stops all playing or queued sounds without releasing prepared audio buffers. + public func stopAll(destination: SoundPlaybackOptions.Destination = .localAndRemote) async { + for sound in sounds.values { + await sound.stop(destination: destination) + } + } +} + +extension SoundPlayer { + var outputFormat: AVAudioFormat? { + let format = engine.outputNode.outputFormat(forBus: 0) + guard format.channelCount > 0 else { return nil } + return format + } + + func makePlayerNodeFormat(for outputFormat: AVAudioFormat) throws -> AVAudioFormat { + guard let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, + sampleRate: outputFormat.sampleRate, + channels: outputFormat.channelCount, + interleaved: outputFormat.isInterleaved) + else { + throw LiveKitError(.soundPlayer, message: "Failed to create player node format") + } + return format + } + + func resetLocalEngineState(needsReconnect: Bool) { + localEngineState = LocalEngineState(needsReconnect: needsReconnect) + } + + func invalidateCachedLocalBuffers() { + for sound in sounds.values { + sound.cachedLocalBuffer = nil + sound.cachedLocalBufferFormat = nil + sound.local.removeAll() + } + } + + func invalidateLocalState() { + resetLocalEngineState(needsReconnect: true) + playerNodePool.reset() + invalidateCachedLocalBuffers() + } + + func handleEngineConfigurationChange() { + invalidateLocalState() + } + + func reconnectEngine(outputFormat: AVAudioFormat, playerNodeFormat: AVAudioFormat) throws { + playerNodePool.stop() + engine.stop() + engine.disconnect(playerNodePool) + playerNodePool.setMaximumFramesToRender(engine.outputNode.auAudioUnit.maximumFramesToRender) + engine.connect(playerNodePool, to: engine.mainMixerNode, + format: outputFormat, playerNodeFormat: playerNodeFormat) + try engine.start() + localEngineState.connectedOutputFormat = outputFormat + localEngineState.playerNodeFormat = playerNodeFormat + localEngineState.needsReconnect = false + } + + @discardableResult + func startEngineIfNeeded() throws -> AVAudioFormat { + guard let outputFormat else { + throw LiveKitError(.soundPlayer, message: "Invalid output format") + } + let playerNodeFormat = try makePlayerNodeFormat(for: outputFormat) + let needsReconnect = localEngineState.needsReconnect + || !engine.isRunning + || localEngineState.connectedOutputFormat != outputFormat + || localEngineState.playerNodeFormat != playerNodeFormat + + if needsReconnect { + try reconnectEngine(outputFormat: outputFormat, playerNodeFormat: playerNodeFormat) + } + + return playerNodeFormat + } + + func stopEngine() { + resetLocalEngineState(needsReconnect: false) + playerNodePool.stop() + engine.stop() + } + + func releaseSound(id soundId: UUID) async { + guard let sound = sounds.removeValue(forKey: soundId) else { return } + + if let name = sound.name, soundIdsByName[name] == soundId { + soundIdsByName.removeValue(forKey: name) + } + + await sound.stop(destination: .localAndRemote) + if sounds.isEmpty { + stopEngine() + } + + try? sound.sessionRequirementHandle.release() + } + + func release(_ sound: SoundHandle) async { + await releaseSound(id: sound.id) + } + + func isPrepared(_ sound: SoundHandle) -> Bool { + sounds[sound.id] != nil + } + + func isPlaying(_ sound: SoundHandle, destination: SoundPlaybackOptions.Destination = .localAndRemote) -> Bool { + guard let sound = sounds[sound.id] else { return false } + switch destination { + case .local: + return sound.local.contains(where: \.isPlaying) + case .remote: + return sound.remote.contains(where: \.isPlaying) + case .localAndRemote: + return sound.local.contains(where: \.isPlaying) || sound.remote.contains(where: \.isPlaying) + } + } + + func stop(_ sound: SoundHandle, destination: SoundPlaybackOptions.Destination = .localAndRemote) async { + guard let soundState = sounds[sound.id] else { return } + await soundState.stop(destination: destination) + } + + func play(_ sound: SoundHandle, options: SoundPlaybackOptions = SoundPlaybackOptions()) async throws { + guard let soundState = sounds[sound.id] else { + throw LiveKitError(.soundPlayer, message: "Sound not prepared") + } + + if options.mode == .replace { + await soundState.stop(destination: .localAndRemote) + guard sounds[sound.id] != nil else { + throw LiveKitError(.soundPlayer, message: "Sound not prepared") + } + } + + soundState.cleanUp() + + if options.destination.includesLocal { + let playerNodeFormat = try startEngineIfNeeded() + let bufferToSchedule = try soundState.localBuffer(for: playerNodeFormat) + try soundState.local.append(playerNodePool.play(bufferToSchedule, loop: options.loop)) + } + + if options.destination.includesRemote { + if let remotePlayback = AudioManager.shared.mixer.playSound(soundState.sourceBuffer, loop: options.loop) { + soundState.remote.append(remotePlayback) + } + } + } + + static func decodeBuffer(from fileURL: URL) async throws -> AVAudioPCMBuffer { + guard fileURL.isFileURL else { + throw LiveKitError(.invalidParameter, message: "Only file URLs are supported") + } + + return try await Task.detached(priority: .userInitiated) { + let audioFile = try AVAudioFile(forReading: fileURL) + guard let readBuffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, + frameCapacity: AVAudioFrameCount(audioFile.length)) + else { + throw LiveKitError(.soundPlayer, message: "Failed to allocate audio buffer") + } + try audioFile.read(into: readBuffer, frameCount: AVAudioFrameCount(audioFile.length)) + return readBuffer + }.value + } +} diff --git a/Sources/LiveKit/Errors.swift b/Sources/LiveKit/Errors.swift index a1d75330b..385931342 100644 --- a/Sources/LiveKit/Errors.swift +++ b/Sources/LiveKit/Errors.swift @@ -57,6 +57,7 @@ public enum LiveKitErrorType: Int, Sendable { // Audio case audioEngine = 801 case audioSession = 802 + case soundPlayer = 803 case codecNotSupported = 901 @@ -118,6 +119,8 @@ extension LiveKitErrorType: CustomStringConvertible { "Audio Engine Error" case .audioSession: "Audio Session Error" + case .soundPlayer: + "Sound Player Error" case .codecNotSupported: "Codec not supported" case .encryptionFailed: diff --git a/Sources/LiveKit/Support/Audio/AudioConverter.swift b/Sources/LiveKit/Support/Audio/AudioConverter.swift index 4323ea8b8..be870dbb6 100644 --- a/Sources/LiveKit/Support/Audio/AudioConverter.swift +++ b/Sources/LiveKit/Support/Audio/AudioConverter.swift @@ -17,6 +17,8 @@ @preconcurrency import AVFAudio final class AudioConverter: Sendable { + static let defaultOutputBufferCapacity: AVAudioFrameCount = 1024 * 10 + let inputFormat: AVAudioFormat let outputFormat: AVAudioFormat @@ -28,10 +30,10 @@ final class AudioConverter: Sendable { let inputSampleRate = inputFormat.sampleRate let outputSampleRate = outputFormat.sampleRate // Compute the output frame capacity based on sample rate ratio - return AVAudioFrameCount(Double(inputFrameCount) * (outputSampleRate / inputSampleRate)) + return AVAudioFrameCount(ceil(Double(inputFrameCount) * (outputSampleRate / inputSampleRate))) } - init?(from inputFormat: AVAudioFormat, to outputFormat: AVAudioFormat, outputBufferCapacity: AVAudioFrameCount = 9600) { + init?(from inputFormat: AVAudioFormat, to outputFormat: AVAudioFormat, outputBufferCapacity: AVAudioFrameCount = AudioConverter.defaultOutputBufferCapacity) { guard let converter = AVAudioConverter(from: inputFormat, to: outputFormat), let buffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputBufferCapacity) else {