From 313ab982f0fe3df42186ad9b104e089fc97107d3 Mon Sep 17 00:00:00 2001
From: Matt Johnson <292243+engineeringstuff@users.noreply.github.com>
Date: Sun, 8 Feb 2026 19:00:07 +0000
Subject: [PATCH] Adds smart selection of correct input channel for
 multi-channel mic

---
 .../delegate/RecorderStreamDelegate.swift     | 228 +++++++++++-------
 1 file changed, 139 insertions(+), 89 deletions(-)
diff --git a/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift b/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift
index ef06e293..44b94ba3 100644
--- a/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift
+++ b/record_macos/macos/record_macos/Sources/record_macos/delegate/RecorderStreamDelegate.swift
@@ -13,6 +13,8 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
 
   private var audioEncoder: AudioEnc?
   private var outputFormat: AVAudioFormat?
+  private var targetSampleRate: Double = 44100.0
+  private var targetChannels: Int = 1
   
   init(onPause: @escaping () -> (), onStop: @escaping () -> ()) {
     self.onPause = onPause
@@ -39,30 +41,26 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
       try setVoiceProcessing(echoCancel: config.echoCancel, autoGain: config.autoGain, audioEngine: audioEngine)
     }
     
-    let srcFormat = audioEngine.inputNode.inputFormat(forBus: 0)
-    
+    targetSampleRate = Double(config.sampleRate)
+    targetChannels = config.numChannels
+
     outputFormat = AVAudioFormat(
       commonFormat: .pcmFormatInt16,
-      sampleRate: Double(config.sampleRate),
-      channels: AVAudioChannelCount(config.numChannels),
+      sampleRate: targetSampleRate,
+      channels: AVAudioChannelCount(targetChannels),
       interleaved: true
     )
     
-    guard let dstFormat = outputFormat else {
+    guard outputFormat != nil else {
       throw RecorderError.error(
         message: "Failed to start recording",
         details: "Format is not supported: \(config.sampleRate)Hz - \(config.numChannels) channels."
       )
     }
-    
-    guard let converter = AVAudioConverter(from: srcFormat, to: dstFormat) else {
-      throw RecorderError.error(
-        message: "Failed to start recording",
-        details: "Format conversion is not possible."
-      )
-    }
-    converter.sampleRateConverterQuality = AVAudioQuality.high.rawValue
-    
+
+    // Tap with the native source format — this is the only format guaranteed to work
+    let srcFormat = audioEngine.inputNode.inputFormat(forBus: 0)
+
     audioEngine.inputNode.installTap(
       onBus: bus,
       bufferSize: AVAudioFrameCount(config.streamBufferSize ?? 1024),
@@ -70,8 +68,6 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
 
       self.stream(
         buffer: buffer,
-        dstFormat: dstFormat,
-        converter: converter,
         recordEventHandler: recordEventHandler
       )
     }
@@ -140,51 +136,107 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
     }
   }
   
-  private func updateAmplitudeInt16(buffer: AVAudioPCMBuffer) {
-    guard let channelData = buffer.int16ChannelData else {
+  private func updateAmplitudeFloat(buffer: AVAudioPCMBuffer) {
+    guard let floatData = buffer.floatChannelData else {
       return
     }
     
     let frameCount = Int(buffer.frameLength)
-    let firstChannelPointer = channelData[0]
-    var maxSample: Float = -160.0
+    var maxSample: Float = 0.0
     
     for i in 0..<frameCount {
-      let curSample = abs(Float(firstChannelPointer[i]))
+      let curSample = abs(floatData[0][i])
       if curSample > maxSample {
         maxSample = curSample
       }
     }
     
-    amplitude = 20 * (log(maxSample / 32767.0) / log(10))
+    if maxSample > 0 {
+      amplitude = 20 * (log(maxSample) / log(10))
+    } else {
+      amplitude = -160.0
+    }
   }
 
   private func stream(
     buffer: AVAudioPCMBuffer,
-    dstFormat: AVAudioFormat,
-    converter: AVAudioConverter,
     recordEventHandler: RecordStreamHandler
   ) -> Void {
 
-    guard let convertedBuffer = convertBuffer(buffer: buffer, dstFormat: dstFormat, converter: converter) else {
-      stop { path in }
+    let frameCount = Int(buffer.frameLength)
+    let channelCount = Int(buffer.format.channelCount)
+    print("[STREAM] frames=\(frameCount), channels=\(channelCount), rate=\(buffer.format.sampleRate)")
+
+    guard frameCount > 0, channelCount > 0 else {
+      print("[STREAM] Empty buffer, skipping")
       return
     }
-    
-    updateAmplitudeInt16(buffer: convertedBuffer)
 
-    if config?.encoder == AudioEncoder.aacLc.rawValue {
-      guard let dataList = encodeAac(buffer: convertedBuffer) else {
-        stop { path in }
-        return
+    updateAmplitudeFloat(buffer: buffer)
+
+    // Manual downmix + resample + convert
+    let data = manualConvert(buffer: buffer)
+    print("[STREAM] converted \(data.count) bytes")
+
+    if config?.encoder == AudioEncoder.pcm16bits.rawValue {
+      sendBytes(dataList: [data], recordEventHandler: recordEventHandler)
+    } else if config?.encoder == AudioEncoder.aacLc.rawValue {
+      sendBytes(dataList: [data], recordEventHandler: recordEventHandler)
+    }
+  }
+
+  // Downmix to mono (ch0), resample via linear interpolation, convert float to int16 LE bytes
+  private func manualConvert(buffer: AVAudioPCMBuffer) -> Data {
+    guard let floatData = buffer.floatChannelData else {
+      print("[CONVERT] No float channel data!")
+      return Data()
+    }
+
+    let srcFrameCount = Int(buffer.frameLength)
+    let srcRate = buffer.format.sampleRate
+    let dstRate = targetSampleRate
+
+    // Find the channel with the most audio energy (not always ch0)
+    let channels = Int(buffer.format.channelCount)
+    var bestChannel = 0
+    var bestMax: Float = 0.0
+    for ch in 0..<channels {
+      var chMax: Float = 0.0
+      for i in 0..<srcFrameCount {
+        let v = abs(floatData[ch][i])
+        if v > chMax { chMax = v }
       }
-      
-      sendBytes(dataList: dataList, recordEventHandler: recordEventHandler)
-    } else if config?.encoder == AudioEncoder.pcm16bits.rawValue {
-      if let data = convertInt16toUInt8(buffer: convertedBuffer) {
-        sendBytes(dataList: [data], recordEventHandler: recordEventHandler)
+      if chMax > bestMax {
+        bestMax = chMax
+        bestChannel = ch
       }
     }
+    let srcChannel = floatData[bestChannel]
+
+    // Calculate output frame count based on sample rate ratio
+    let dstFrameCount = Int(Double(srcFrameCount) * dstRate / srcRate)
+
+    var bytes = Data(capacity: dstFrameCount * targetChannels * 2)
+
+    for i in 0..<dstFrameCount {
+      // Map output sample index to fractional input index
+      let srcIndex = Double(i) * srcRate / dstRate
+      let idx0 = Int(srcIndex)
+      let frac = Float(srcIndex - Double(idx0))
+
+      let s0 = srcChannel[min(idx0, srcFrameCount - 1)]
+      let s1 = srcChannel[min(idx0 + 1, srcFrameCount - 1)]
+      let sample = s0 + frac * (s1 - s0)
+
+      let clamped = max(-1.0, min(1.0, sample))
+      let int16Val = Int16(clamped * 32767.0)
+
+      // Little-endian
+      bytes.append(UInt8(int16Val & 0x00FF))
+      bytes.append(UInt8((int16Val >> 8) & 0x00FF))
+    }
+
+    return bytes
   }
 
   private func sendBytes(dataList: [Data], recordEventHandler: RecordStreamHandler) {
@@ -198,74 +250,72 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
     }
   }
 
-  private func convertBuffer(
-    buffer: AVAudioPCMBuffer,
-    dstFormat: AVAudioFormat,
-    converter: AVAudioConverter) -> AVAudioPCMBuffer? {
-
-    let bufferToConvert: AVAudioPCMBuffer
-    let converterToUse: AVAudioConverter
-
-    if buffer.format.channelCount > 1 {
-      // Create a mono version of the buffer and a new converter for it
-      guard let monoFormat = AVAudioFormat(
-        commonFormat: .pcmFormatFloat32,
-        sampleRate: buffer.format.sampleRate,
-        channels: 1,
-        interleaved: false
-      ) else {
-        print("Unable to create mono format")
-        return nil
-      }
-
-      guard let monoBuffer = AVAudioPCMBuffer(pcmFormat: monoFormat, frameCapacity: buffer.frameLength) else {
-        print("Unable to create mono buffer")
-        return nil
-      }
+  private func convertFloatToInt16Bytes(buffer: AVAudioPCMBuffer) -> Data {
+    guard let floatData = buffer.floatChannelData else {
+      return Data()
+    }
 
-      monoBuffer.frameLength = buffer.frameLength
+    let frameCount = Int(buffer.frameLength)
+    let channels = Int(buffer.format.channelCount)
 
-      // Copy the first channel
-      if let src = buffer.floatChannelData, let dst = monoBuffer.floatChannelData {
-          memcpy(dst[0], src[0], Int(buffer.frameLength) * MemoryLayout<Float>.size)
+    var bytes = Data(capacity: frameCount * channels * 2)
+    for i in 0..<frameCount {
+      for ch in 0..<channels {
+        let sample = floatData[ch][i]
+        let clamped = max(-1.0, min(1.0, sample))
+        let int16Val = Int16(clamped * 32767.0)
+        // Little-endian
+        bytes.append(UInt8(int16Val & 0x00FF))
+        bytes.append(UInt8((int16Val >> 8) & 0x00FF))
       }
+    }
 
-      guard let monoConverter = AVAudioConverter(from: monoFormat, to: dstFormat) else {
-        print("Unable to create mono converter")
-        return nil
-      }
-      monoConverter.sampleRateConverterQuality = AVAudioQuality.high.rawValue
+    return bytes
+  }
 
-      bufferToConvert = monoBuffer
-      converterToUse = monoConverter
-    } else {
-      bufferToConvert = buffer
-      converterToUse = converter
+  private func convertFloatToInt16(
+    buffer: AVAudioPCMBuffer,
+    dstFormat: AVAudioFormat) -> AVAudioPCMBuffer? {
+
+    guard let floatData = buffer.floatChannelData else {
+      print("No float channel data in buffer")
+      return nil
     }
 
-    let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
-      outStatus.pointee = .haveData
-      return bufferToConvert
+    // Use non-interleaved format for AVAudioPCMBuffer compatibility
+    guard let niFormat = AVAudioFormat(
+      commonFormat: .pcmFormatInt16,
+      sampleRate: dstFormat.sampleRate,
+      channels: dstFormat.channelCount,
+      interleaved: false
+    ) else {
+      print("Unable to create non-interleaved int16 format")
+      return nil
     }
 
-    // Determine frame capacity
-    let capacity = AVAudioFrameCount(Double(bufferToConvert.frameLength) * dstFormat.sampleRate / bufferToConvert.format.sampleRate)
+    let frameCount = Int(buffer.frameLength)
+    let channels = Int(buffer.format.channelCount)
 
-    // Destination buffer
-    guard let convertedBuffer = AVAudioPCMBuffer(pcmFormat: dstFormat, frameCapacity: capacity) else {
-      print("Unable to create output buffer")
+    guard let int16Buffer = AVAudioPCMBuffer(pcmFormat: niFormat, frameCapacity: buffer.frameLength) else {
+      print("Unable to create int16 output buffer")
       return nil
     }
+    int16Buffer.frameLength = buffer.frameLength
 
-    // Convert input buffer (resample, num channels)
-    var error: NSError? = nil
-    converterToUse.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
-    if error != nil {
-      print("Unable to convert input buffer \(error!)")
+    guard let int16Data = int16Buffer.int16ChannelData else {
+      print("No int16 channel data in output buffer")
       return nil
     }
 
-    return convertedBuffer
+    for ch in 0..<channels {
+      for i in 0..<frameCount {
+        let sample = floatData[ch][i]
+        let clamped = max(-1.0, min(1.0, sample))
+        int16Data[ch][i] = Int16(clamped * 32767.0)
+      }
+    }
+
+    return int16Buffer
   }
 
   private func encodeAac(buffer: AVAudioPCMBuffer) -> [Data]? {