From 313ab982f0fe3df42186ad9b104e089fc97107d3 Mon Sep 17 00:00:00 2001 From: Matt Johnson <292243+engineeringstuff@users.noreply.github.com> Date: Sun, 8 Feb 2026 19:00:07 +0000 Subject: [PATCH] Adds smart selection of correct input channel for multi-channel mic --- .../delegate/RecorderStreamDelegate.swift | 228 +++++++++++------- 1 file changed, 139 insertions(+), 89 deletions(-) diff --git a/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift b/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift index ef06e293..44b94ba3 100644 --- a/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift +++ b/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift @@ -13,6 +13,8 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate { private var audioEncoder: AudioEnc? private var outputFormat: AVAudioFormat? + private var targetSampleRate: Double = 44100.0 + private var targetChannels: Int = 1 init(onPause: @escaping () -> (), onStop: @escaping () -> ()) { self.onPause = onPause @@ -39,30 +41,26 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate { try setVoiceProcessing(echoCancel: config.echoCancel, autoGain: config.autoGain, audioEngine: audioEngine) } - let srcFormat = audioEngine.inputNode.inputFormat(forBus: 0) - + targetSampleRate = Double(config.sampleRate) + targetChannels = config.numChannels + outputFormat = AVAudioFormat( commonFormat: .pcmFormatInt16, - sampleRate: Double(config.sampleRate), - channels: AVAudioChannelCount(config.numChannels), + sampleRate: targetSampleRate, + channels: AVAudioChannelCount(targetChannels), interleaved: true ) - guard let dstFormat = outputFormat else { + guard outputFormat != nil else { throw RecorderError.error( message: "Failed to start recording", details: "Format is not supported: \(config.sampleRate)Hz - \(config.numChannels) channels." ) } - - guard let converter = AVAudioConverter(from: srcFormat, to: dstFormat) else { - throw RecorderError.error( - message: "Failed to start recording", - details: "Format conversion is not possible." - ) - } - converter.sampleRateConverterQuality = AVAudioQuality.high.rawValue - + + // Tap with the native source format — this is the only format guaranteed to work + let srcFormat = audioEngine.inputNode.inputFormat(forBus: 0) + audioEngine.inputNode.installTap( onBus: bus, bufferSize: AVAudioFrameCount(config.streamBufferSize ?? 1024), @@ -70,8 +68,6 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate { self.stream( buffer: buffer, - dstFormat: dstFormat, - converter: converter, recordEventHandler: recordEventHandler ) } @@ -140,51 +136,107 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate { } } - private func updateAmplitudeInt16(buffer: AVAudioPCMBuffer) { - guard let channelData = buffer.int16ChannelData else { + private func updateAmplitudeFloat(buffer: AVAudioPCMBuffer) { + guard let floatData = buffer.floatChannelData else { return } let frameCount = Int(buffer.frameLength) - let firstChannelPointer = channelData[0] - var maxSample: Float = -160.0 + var maxSample: Float = 0.0 for i in 0.. maxSample { maxSample = curSample } } - amplitude = 20 * (log(maxSample / 32767.0) / log(10)) + if maxSample > 0 { + amplitude = 20 * (log(maxSample) / log(10)) + } else { + amplitude = -160.0 + } } private func stream( buffer: AVAudioPCMBuffer, - dstFormat: AVAudioFormat, - converter: AVAudioConverter, recordEventHandler: RecordStreamHandler ) -> Void { - guard let convertedBuffer = convertBuffer(buffer: buffer, dstFormat: dstFormat, converter: converter) else { - stop { path in } + let frameCount = Int(buffer.frameLength) + let channelCount = Int(buffer.format.channelCount) + print("[STREAM] frames=\(frameCount), channels=\(channelCount), rate=\(buffer.format.sampleRate)") + + guard frameCount > 0, channelCount > 0 else { + print("[STREAM] Empty buffer, skipping") return } - - updateAmplitudeInt16(buffer: convertedBuffer) - if config?.encoder == AudioEncoder.aacLc.rawValue { - guard let dataList = encodeAac(buffer: convertedBuffer) else { - stop { path in } - return + updateAmplitudeFloat(buffer: buffer) + + // Manual downmix + resample + convert + let data = manualConvert(buffer: buffer) + print("[STREAM] converted \(data.count) bytes") + + if config?.encoder == AudioEncoder.pcm16bits.rawValue { + sendBytes(dataList: [data], recordEventHandler: recordEventHandler) + } else if config?.encoder == AudioEncoder.aacLc.rawValue { + sendBytes(dataList: [data], recordEventHandler: recordEventHandler) + } + } + + // Downmix to mono (ch0), resample via linear interpolation, convert float to int16 LE bytes + private func manualConvert(buffer: AVAudioPCMBuffer) -> Data { + guard let floatData = buffer.floatChannelData else { + print("[CONVERT] No float channel data!") + return Data() + } + + let srcFrameCount = Int(buffer.frameLength) + let srcRate = buffer.format.sampleRate + let dstRate = targetSampleRate + + // Find the channel with the most audio energy (not always ch0) + let channels = Int(buffer.format.channelCount) + var bestChannel = 0 + var bestMax: Float = 0.0 + for ch in 0.. chMax { chMax = v } } - - sendBytes(dataList: dataList, recordEventHandler: recordEventHandler) - } else if config?.encoder == AudioEncoder.pcm16bits.rawValue { - if let data = convertInt16toUInt8(buffer: convertedBuffer) { - sendBytes(dataList: [data], recordEventHandler: recordEventHandler) + if chMax > bestMax { + bestMax = chMax + bestChannel = ch } } + let srcChannel = floatData[bestChannel] + + // Calculate output frame count based on sample rate ratio + let dstFrameCount = Int(Double(srcFrameCount) * dstRate / srcRate) + + var bytes = Data(capacity: dstFrameCount * targetChannels * 2) + + for i in 0..> 8) & 0x00FF)) + } + + return bytes } private func sendBytes(dataList: [Data], recordEventHandler: RecordStreamHandler) { @@ -198,74 +250,72 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate { } } - private func convertBuffer( - buffer: AVAudioPCMBuffer, - dstFormat: AVAudioFormat, - converter: AVAudioConverter) -> AVAudioPCMBuffer? { - - let bufferToConvert: AVAudioPCMBuffer - let converterToUse: AVAudioConverter - - if buffer.format.channelCount > 1 { - // Create a mono version of the buffer and a new converter for it - guard let monoFormat = AVAudioFormat( - commonFormat: .pcmFormatFloat32, - sampleRate: buffer.format.sampleRate, - channels: 1, - interleaved: false - ) else { - print("Unable to create mono format") - return nil - } - - guard let monoBuffer = AVAudioPCMBuffer(pcmFormat: monoFormat, frameCapacity: buffer.frameLength) else { - print("Unable to create mono buffer") - return nil - } + private func convertFloatToInt16Bytes(buffer: AVAudioPCMBuffer) -> Data { + guard let floatData = buffer.floatChannelData else { + return Data() + } - monoBuffer.frameLength = buffer.frameLength + let frameCount = Int(buffer.frameLength) + let channels = Int(buffer.format.channelCount) - // Copy the first channel - if let src = buffer.floatChannelData, let dst = monoBuffer.floatChannelData { - memcpy(dst[0], src[0], Int(buffer.frameLength) * MemoryLayout.size) + var bytes = Data(capacity: frameCount * channels * 2) + for i in 0..> 8) & 0x00FF)) } + } - guard let monoConverter = AVAudioConverter(from: monoFormat, to: dstFormat) else { - print("Unable to create mono converter") - return nil - } - monoConverter.sampleRateConverterQuality = AVAudioQuality.high.rawValue + return bytes + } - bufferToConvert = monoBuffer - converterToUse = monoConverter - } else { - bufferToConvert = buffer - converterToUse = converter + private func convertFloatToInt16( + buffer: AVAudioPCMBuffer, + dstFormat: AVAudioFormat) -> AVAudioPCMBuffer? { + + guard let floatData = buffer.floatChannelData else { + print("No float channel data in buffer") + return nil } - let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in - outStatus.pointee = .haveData - return bufferToConvert + // Use non-interleaved format for AVAudioPCMBuffer compatibility + guard let niFormat = AVAudioFormat( + commonFormat: .pcmFormatInt16, + sampleRate: dstFormat.sampleRate, + channels: dstFormat.channelCount, + interleaved: false + ) else { + print("Unable to create non-interleaved int16 format") + return nil } - // Determine frame capacity - let capacity = AVAudioFrameCount(Double(bufferToConvert.frameLength) * dstFormat.sampleRate / bufferToConvert.format.sampleRate) + let frameCount = Int(buffer.frameLength) + let channels = Int(buffer.format.channelCount) - // Destination buffer - guard let convertedBuffer = AVAudioPCMBuffer(pcmFormat: dstFormat, frameCapacity: capacity) else { - print("Unable to create output buffer") + guard let int16Buffer = AVAudioPCMBuffer(pcmFormat: niFormat, frameCapacity: buffer.frameLength) else { + print("Unable to create int16 output buffer") return nil } + int16Buffer.frameLength = buffer.frameLength - // Convert input buffer (resample, num channels) - var error: NSError? = nil - converterToUse.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback) - if error != nil { - print("Unable to convert input buffer \(error!)") + guard let int16Data = int16Buffer.int16ChannelData else { + print("No int16 channel data in output buffer") return nil } - return convertedBuffer + for ch in 0.. [Data]? {