Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {

private var audioEncoder: AudioEnc?
private var outputFormat: AVAudioFormat?
private var targetSampleRate: Double = 44100.0
private var targetChannels: Int = 1

init(onPause: @escaping () -> (), onStop: @escaping () -> ()) {
self.onPause = onPause
Expand All @@ -39,39 +41,33 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
try setVoiceProcessing(echoCancel: config.echoCancel, autoGain: config.autoGain, audioEngine: audioEngine)
}

let srcFormat = audioEngine.inputNode.inputFormat(forBus: 0)

targetSampleRate = Double(config.sampleRate)
targetChannels = config.numChannels

outputFormat = AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: Double(config.sampleRate),
channels: AVAudioChannelCount(config.numChannels),
sampleRate: targetSampleRate,
channels: AVAudioChannelCount(targetChannels),
interleaved: true
)

guard let dstFormat = outputFormat else {
guard outputFormat != nil else {
throw RecorderError.error(
message: "Failed to start recording",
details: "Format is not supported: \(config.sampleRate)Hz - \(config.numChannels) channels."
)
}

guard let converter = AVAudioConverter(from: srcFormat, to: dstFormat) else {
throw RecorderError.error(
message: "Failed to start recording",
details: "Format conversion is not possible."
)
}
converter.sampleRateConverterQuality = AVAudioQuality.high.rawValue


// Tap with the native source format — this is the only format guaranteed to work
let srcFormat = audioEngine.inputNode.inputFormat(forBus: 0)

audioEngine.inputNode.installTap(
onBus: bus,
bufferSize: AVAudioFrameCount(config.streamBufferSize ?? 1024),
format: srcFormat) { (buffer, _) -> Void in

self.stream(
buffer: buffer,
dstFormat: dstFormat,
converter: converter,
recordEventHandler: recordEventHandler
)
}
Expand Down Expand Up @@ -140,51 +136,107 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
}
}

private func updateAmplitudeInt16(buffer: AVAudioPCMBuffer) {
guard let channelData = buffer.int16ChannelData else {
private func updateAmplitudeFloat(buffer: AVAudioPCMBuffer) {
guard let floatData = buffer.floatChannelData else {
return
}

let frameCount = Int(buffer.frameLength)
let firstChannelPointer = channelData[0]
var maxSample: Float = -160.0
var maxSample: Float = 0.0

for i in 0..<frameCount {
let curSample = abs(Float(firstChannelPointer[i]))
let curSample = abs(floatData[0][i])
if curSample > maxSample {
maxSample = curSample
}
}

amplitude = 20 * (log(maxSample / 32767.0) / log(10))
if maxSample > 0 {
amplitude = 20 * (log(maxSample) / log(10))
} else {
amplitude = -160.0
}
}

private func stream(
buffer: AVAudioPCMBuffer,
dstFormat: AVAudioFormat,
converter: AVAudioConverter,
recordEventHandler: RecordStreamHandler
) -> Void {

guard let convertedBuffer = convertBuffer(buffer: buffer, dstFormat: dstFormat, converter: converter) else {
stop { path in }
let frameCount = Int(buffer.frameLength)
let channelCount = Int(buffer.format.channelCount)
print("[STREAM] frames=\(frameCount), channels=\(channelCount), rate=\(buffer.format.sampleRate)")

guard frameCount > 0, channelCount > 0 else {
print("[STREAM] Empty buffer, skipping")
return
}

updateAmplitudeInt16(buffer: convertedBuffer)

if config?.encoder == AudioEncoder.aacLc.rawValue {
guard let dataList = encodeAac(buffer: convertedBuffer) else {
stop { path in }
return
updateAmplitudeFloat(buffer: buffer)

// Manual downmix + resample + convert
let data = manualConvert(buffer: buffer)
print("[STREAM] converted \(data.count) bytes")

if config?.encoder == AudioEncoder.pcm16bits.rawValue {
sendBytes(dataList: [data], recordEventHandler: recordEventHandler)
} else if config?.encoder == AudioEncoder.aacLc.rawValue {
sendBytes(dataList: [data], recordEventHandler: recordEventHandler)
}
}

// Downmix to mono (ch0), resample via linear interpolation, convert float to int16 LE bytes
private func manualConvert(buffer: AVAudioPCMBuffer) -> Data {
guard let floatData = buffer.floatChannelData else {
print("[CONVERT] No float channel data!")
return Data()
}

let srcFrameCount = Int(buffer.frameLength)
let srcRate = buffer.format.sampleRate
let dstRate = targetSampleRate

// Find the channel with the most audio energy (not always ch0)
let channels = Int(buffer.format.channelCount)
var bestChannel = 0
var bestMax: Float = 0.0
for ch in 0..<channels {
var chMax: Float = 0.0
for i in 0..<srcFrameCount {
let v = abs(floatData[ch][i])
if v > chMax { chMax = v }
}

sendBytes(dataList: dataList, recordEventHandler: recordEventHandler)
} else if config?.encoder == AudioEncoder.pcm16bits.rawValue {
if let data = convertInt16toUInt8(buffer: convertedBuffer) {
sendBytes(dataList: [data], recordEventHandler: recordEventHandler)
if chMax > bestMax {
bestMax = chMax
bestChannel = ch
}
}
let srcChannel = floatData[bestChannel]

// Calculate output frame count based on sample rate ratio
let dstFrameCount = Int(Double(srcFrameCount) * dstRate / srcRate)

var bytes = Data(capacity: dstFrameCount * targetChannels * 2)

for i in 0..<dstFrameCount {
// Map output sample index to fractional input index
let srcIndex = Double(i) * srcRate / dstRate
let idx0 = Int(srcIndex)
let frac = Float(srcIndex - Double(idx0))

let s0 = srcChannel[min(idx0, srcFrameCount - 1)]
let s1 = srcChannel[min(idx0 + 1, srcFrameCount - 1)]
let sample = s0 + frac * (s1 - s0)

let clamped = max(-1.0, min(1.0, sample))
let int16Val = Int16(clamped * 32767.0)

// Little-endian
bytes.append(UInt8(int16Val & 0x00FF))
bytes.append(UInt8((int16Val >> 8) & 0x00FF))
}

return bytes
}

private func sendBytes(dataList: [Data], recordEventHandler: RecordStreamHandler) {
Expand All @@ -198,74 +250,72 @@ class RecorderStreamDelegate: NSObject, AudioRecordingStreamDelegate {
}
}

private func convertBuffer(
buffer: AVAudioPCMBuffer,
dstFormat: AVAudioFormat,
converter: AVAudioConverter) -> AVAudioPCMBuffer? {

let bufferToConvert: AVAudioPCMBuffer
let converterToUse: AVAudioConverter

if buffer.format.channelCount > 1 {
// Create a mono version of the buffer and a new converter for it
guard let monoFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: buffer.format.sampleRate,
channels: 1,
interleaved: false
) else {
print("Unable to create mono format")
return nil
}

guard let monoBuffer = AVAudioPCMBuffer(pcmFormat: monoFormat, frameCapacity: buffer.frameLength) else {
print("Unable to create mono buffer")
return nil
}
private func convertFloatToInt16Bytes(buffer: AVAudioPCMBuffer) -> Data {
guard let floatData = buffer.floatChannelData else {
return Data()
}

monoBuffer.frameLength = buffer.frameLength
let frameCount = Int(buffer.frameLength)
let channels = Int(buffer.format.channelCount)

// Copy the first channel
if let src = buffer.floatChannelData, let dst = monoBuffer.floatChannelData {
memcpy(dst[0], src[0], Int(buffer.frameLength) * MemoryLayout<Float>.size)
var bytes = Data(capacity: frameCount * channels * 2)
for i in 0..<frameCount {
for ch in 0..<channels {
let sample = floatData[ch][i]
let clamped = max(-1.0, min(1.0, sample))
let int16Val = Int16(clamped * 32767.0)
// Little-endian
bytes.append(UInt8(int16Val & 0x00FF))
bytes.append(UInt8((int16Val >> 8) & 0x00FF))
}
}

guard let monoConverter = AVAudioConverter(from: monoFormat, to: dstFormat) else {
print("Unable to create mono converter")
return nil
}
monoConverter.sampleRateConverterQuality = AVAudioQuality.high.rawValue
return bytes
}

bufferToConvert = monoBuffer
converterToUse = monoConverter
} else {
bufferToConvert = buffer
converterToUse = converter
private func convertFloatToInt16(
buffer: AVAudioPCMBuffer,
dstFormat: AVAudioFormat) -> AVAudioPCMBuffer? {

guard let floatData = buffer.floatChannelData else {
print("No float channel data in buffer")
return nil
}

let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = .haveData
return bufferToConvert
// Use non-interleaved format for AVAudioPCMBuffer compatibility
guard let niFormat = AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: dstFormat.sampleRate,
channels: dstFormat.channelCount,
interleaved: false
) else {
print("Unable to create non-interleaved int16 format")
return nil
}

// Determine frame capacity
let capacity = AVAudioFrameCount(Double(bufferToConvert.frameLength) * dstFormat.sampleRate / bufferToConvert.format.sampleRate)
let frameCount = Int(buffer.frameLength)
let channels = Int(buffer.format.channelCount)

// Destination buffer
guard let convertedBuffer = AVAudioPCMBuffer(pcmFormat: dstFormat, frameCapacity: capacity) else {
print("Unable to create output buffer")
guard let int16Buffer = AVAudioPCMBuffer(pcmFormat: niFormat, frameCapacity: buffer.frameLength) else {
print("Unable to create int16 output buffer")
return nil
}
int16Buffer.frameLength = buffer.frameLength

// Convert input buffer (resample, num channels)
var error: NSError? = nil
converterToUse.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
if error != nil {
print("Unable to convert input buffer \(error!)")
guard let int16Data = int16Buffer.int16ChannelData else {
print("No int16 channel data in output buffer")
return nil
}

return convertedBuffer
for ch in 0..<channels {
for i in 0..<frameCount {
let sample = floatData[ch][i]
let clamped = max(-1.0, min(1.0, sample))
int16Data[ch][i] = Int16(clamped * 32767.0)
}
}

return int16Buffer
}

private func encodeAac(buffer: AVAudioPCMBuffer) -> [Data]? {
Expand Down