From d33f8c478607975dbeb9654e2e6a2eba65f0b560 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Fri, 3 Oct 2025 14:38:43 +0200 Subject: [PATCH 1/7] feat: remove stft calculation within the encoder --- .../common/rnexecutorch/models/BaseModel.cpp | 2 +- .../common/rnexecutorch/models/BaseModel.h | 2 +- .../models/speech_to_text/asr/ASR.cpp | 36 +++++++++---------- .../models/speech_to_text/asr/ASR.h | 9 +++-- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp index a1194de69..72bb11e4d 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp @@ -56,7 +56,7 @@ std::vector BaseModel::getInputShape(std::string method_name, } std::vector> -BaseModel::getAllInputShapes(std::string methodName) { +BaseModel::getAllInputShapes(std::string methodName) const { if (!module_) { throw std::runtime_error("Model not loaded: Cannot get all input shapes"); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h index b7b7b54ed..27aed7351 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h @@ -27,7 +27,7 @@ class BaseModel { void unload() noexcept; std::vector getInputShape(std::string method_name, int32_t index); std::vector> - getAllInputShapes(std::string methodName = "forward"); + getAllInputShapes(std::string methodName = "forward") const; std::vector forwardJS(std::vector tensorViewVec); Result> forward(const EValue &input_value) const; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp index d0f965cb3..7c39e4020 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp @@ -4,8 +4,8 @@ #include "ASR.h" #include "executorch/extension/tensor/tensor_ptr.h" #include "rnexecutorch/data_processing/Numerical.h" -#include "rnexecutorch/data_processing/dsp.h" #include "rnexecutorch/data_processing/gzip.h" +#include namespace rnexecutorch::models::speech_to_text::asr { @@ -37,8 +37,7 @@ ASR::getInitialSequence(const DecodingOptions &options) const { return seq; } -GenerationResult ASR::generate(std::span waveform, - float temperature, +GenerationResult ASR::generate(std::span waveform, float temperature, const DecodingOptions &options) const { std::vector encoderOutput = this->encode(waveform); @@ -94,7 +93,7 @@ float ASR::getCompressionRatio(const std::string &text) const { } std::vector -ASR::generateWithFallback(std::span waveform, +ASR::generateWithFallback(std::span waveform, const DecodingOptions &options) const { std::vector temperatures = {0.0f, 0.2f, 0.4f, 0.6f, 0.8f, 1.0f}; std::vector bestTokens; @@ -209,7 +208,7 @@ ASR::estimateWordLevelTimestampsLinear(std::span tokens, return wordObjs; } -std::vector ASR::transcribe(std::span waveform, +std::vector ASR::transcribe(std::span waveform, const DecodingOptions &options) const { int32_t seek = 0; std::vector results; @@ -218,7 +217,7 @@ std::vector ASR::transcribe(std::span waveform, int32_t start = seek * ASR::kSamplingRate; const auto end = std::min( (seek + ASR::kChunkSize) * ASR::kSamplingRate, waveform.size()); - std::span chunk = waveform.subspan(start, end - start); + auto chunk = waveform.subspan(start, end - start); if (std::cmp_less(chunk.size(), ASR::kMinChunkSamples)) { break; @@ -246,19 +245,12 @@ std::vector ASR::transcribe(std::span waveform, return results; } -std::vector ASR::encode(std::span waveform) const { - constexpr int32_t fftWindowSize = 512; - constexpr int32_t stftHopLength = 160; - constexpr int32_t innerDim = 256; - - std::vector preprocessedData = - dsp::stftFromWaveform(waveform, fftWindowSize, stftHopLength); - const auto numFrames = - static_cast(preprocessedData.size()) / innerDim; - std::vector inputShape = {numFrames, innerDim}; +std::vector ASR::encode(std::span waveform) const { + auto inputShape = {static_cast(waveform.size())}; const auto modelInputTensor = executorch::extension::make_tensor_ptr( - std::move(inputShape), std::move(preprocessedData)); + std::move(inputShape), waveform.data(), + executorch::runtime::etensor::ScalarType::Float); const auto encoderResult = this->encoder->forward(modelInputTensor); if (!encoderResult.ok()) { @@ -268,7 +260,7 @@ std::vector ASR::encode(std::span waveform) const { } const auto decoderOutputTensor = encoderResult.get().at(0).toTensor(); - const int32_t outputNumel = decoderOutputTensor.numel(); + const auto outputNumel = decoderOutputTensor.numel(); const float *const dataPtr = decoderOutputTensor.const_data_ptr(); return {dataPtr, dataPtr + outputNumel}; @@ -277,12 +269,18 @@ std::vector ASR::encode(std::span waveform) const { std::vector ASR::decode(std::span tokens, std::span encoderOutput) const { std::vector tokenShape = {1, static_cast(tokens.size())}; + auto tokensLong = std::vector(tokens.begin(), tokens.end()); + auto tokenTensor = executorch::extension::make_tensor_ptr( - std::move(tokenShape), tokens.data(), ScalarType::Int); + tokenShape, tokensLong.data(), ScalarType::Long); const auto encoderOutputSize = static_cast(encoderOutput.size()); std::vector encShape = {1, ASR::kNumFrames, encoderOutputSize / ASR::kNumFrames}; + log(LOG_LEVEL::Debug, encShape); + log(LOG_LEVEL::Debug, tokenShape); + log(LOG_LEVEL::Debug, this->encoder->getAllInputShapes()); + log(LOG_LEVEL::Debug, this->decoder->getAllInputShapes()); auto encoderTensor = executorch::extension::make_tensor_ptr( std::move(encShape), encoderOutput.data(), ScalarType::Float); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h index 20180ebe4..a0ea7e181 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h @@ -14,9 +14,9 @@ class ASR { const models::BaseModel *decoder, const TokenizerModule *tokenizer); std::vector - transcribe(std::span waveform, + transcribe(std::span waveform, const types::DecodingOptions &options) const; - std::vector encode(std::span waveform) const; + std::vector encode(std::span waveform) const; std::vector decode(std::span tokens, std::span encoderOutput) const; @@ -44,11 +44,10 @@ class ASR { std::vector getInitialSequence(const types::DecodingOptions &options) const; - types::GenerationResult generate(std::span waveform, - float temperature, + types::GenerationResult generate(std::span waveform, float temperature, const types::DecodingOptions &options) const; std::vector - generateWithFallback(std::span waveform, + generateWithFallback(std::span waveform, const types::DecodingOptions &options) const; std::vector calculateWordLevelTimestamps(std::span tokens, From c9a086bfbfc2c052b56d64a189f6b34847e28904 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Fri, 3 Oct 2025 14:39:55 +0200 Subject: [PATCH 2/7] chore: remove logs --- .../common/rnexecutorch/models/speech_to_text/asr/ASR.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp index 7c39e4020..bf8f9fb86 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp @@ -5,7 +5,6 @@ #include "executorch/extension/tensor/tensor_ptr.h" #include "rnexecutorch/data_processing/Numerical.h" #include "rnexecutorch/data_processing/gzip.h" -#include namespace rnexecutorch::models::speech_to_text::asr { @@ -277,10 +276,6 @@ std::vector ASR::decode(std::span tokens, const auto encoderOutputSize = static_cast(encoderOutput.size()); std::vector encShape = {1, ASR::kNumFrames, encoderOutputSize / ASR::kNumFrames}; - log(LOG_LEVEL::Debug, encShape); - log(LOG_LEVEL::Debug, tokenShape); - log(LOG_LEVEL::Debug, this->encoder->getAllInputShapes()); - log(LOG_LEVEL::Debug, this->decoder->getAllInputShapes()); auto encoderTensor = executorch::extension::make_tensor_ptr( std::move(encShape), encoderOutput.data(), ScalarType::Float); From 54b56518d7e346b1252c0a427eb6affc8c2ce205 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Fri, 3 Oct 2025 14:44:57 +0200 Subject: [PATCH 3/7] fix: mark some methods const in the BaseModel --- .../common/rnexecutorch/models/BaseModel.cpp | 10 +++++----- .../common/rnexecutorch/models/BaseModel.h | 15 +++++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp index 72bb11e4d..ee53c7d5a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp @@ -30,7 +30,7 @@ BaseModel::BaseModel(const std::string &modelSource, } std::vector BaseModel::getInputShape(std::string method_name, - int32_t index) { + int32_t index) const { if (!module_) { throw std::runtime_error("Model not loaded: Cannot get input shape"); } @@ -88,7 +88,7 @@ BaseModel::getAllInputShapes(std::string methodName) const { /// to JS. It is not meant to be used within C++. If you want to call forward /// from C++ on a BaseModel, please use BaseModel::forward. std::vector -BaseModel::forwardJS(std::vector tensorViewVec) { +BaseModel::forwardJS(std::vector tensorViewVec) const { if (!module_) { throw std::runtime_error("Model not loaded: Cannot perform forward pass"); } @@ -136,7 +136,7 @@ BaseModel::forwardJS(std::vector tensorViewVec) { } Result -BaseModel::getMethodMeta(const std::string &methodName) { +BaseModel::getMethodMeta(const std::string &methodName) const { if (!module_) { throw std::runtime_error("Model not loaded: Cannot get method meta!"); } @@ -161,7 +161,7 @@ BaseModel::forward(const std::vector &input_evalues) const { Result> BaseModel::execute(const std::string &methodName, - const std::vector &input_value) { + const std::vector &input_value) const { if (!module_) { throw std::runtime_error("Model not loaded, cannot run execute."); } @@ -175,7 +175,7 @@ std::size_t BaseModel::getMemoryLowerBound() const noexcept { void BaseModel::unload() noexcept { module_.reset(nullptr); } std::vector -BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) { +BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) const { auto sizes = tensor.sizes(); return std::vector(sizes.begin(), sizes.end()); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h index 27aed7351..cf2940429 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h @@ -25,18 +25,20 @@ class BaseModel { Module::LoadMode loadMode = Module::LoadMode::MmapUseMlockIgnoreErrors); std::size_t getMemoryLowerBound() const noexcept; void unload() noexcept; - std::vector getInputShape(std::string method_name, int32_t index); + std::vector getInputShape(std::string method_name, + int32_t index) const; std::vector> getAllInputShapes(std::string methodName = "forward") const; std::vector - forwardJS(std::vector tensorViewVec); + forwardJS(std::vector tensorViewVec) const; Result> forward(const EValue &input_value) const; Result> forward(const std::vector &input_value) const; - Result> execute(const std::string &methodName, - const std::vector &input_value); + Result> + execute(const std::string &methodName, + const std::vector &input_value) const; Result - getMethodMeta(const std::string &methodName); + getMethodMeta(const std::string &methodName) const; protected: // If possible, models should not use the JS runtime to keep JSI internals @@ -49,7 +51,8 @@ class BaseModel { std::size_t memorySizeLowerBound{0}; private: - std::vector getTensorShape(const executorch::aten::Tensor &tensor); + std::vector + getTensorShape(const executorch::aten::Tensor &tensor) const; }; } // namespace models From f615939abae22b5e399f8d88dab3b39b79d6b6a0 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Mon, 1 Dec 2025 10:36:49 +0100 Subject: [PATCH 4/7] chore: bring back DSP, remove stftFromWaveform --- .../rnexecutorch/data_processing/dsp.cpp | 46 ------------------- .../VoiceActivityDetection.cpp | 3 +- 2 files changed, 1 insertion(+), 48 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/dsp.cpp b/packages/react-native-executorch/common/rnexecutorch/data_processing/dsp.cpp index d3761dced..b1c8714a2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/data_processing/dsp.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/data_processing/dsp.cpp @@ -1,6 +1,4 @@ -#include #include -#include #include #include #include @@ -18,48 +16,4 @@ std::vector hannWindow(size_t size) { return window; } -std::vector stftFromWaveform(std::span waveform, - size_t fftWindowSize, size_t hopSize) { - // Initialize FFT - FFT fft(fftWindowSize); - - const auto numFrames = 1 + (waveform.size() - fftWindowSize) / hopSize; - const auto numBins = fftWindowSize / 2; - const auto hann = hannWindow(fftWindowSize); - auto inBuffer = std::vector(fftWindowSize); - auto outBuffer = std::vector>(fftWindowSize); - - // Output magnitudes in dB - std::vector magnitudes; - magnitudes.reserve(numFrames * numBins); - const auto magnitudeScale = 1.0f / static_cast(fftWindowSize); - constexpr auto epsilon = std::numeric_limits::epsilon(); - constexpr auto dbConversionFactor = 20.0f; - - for (size_t t = 0; t < numFrames; ++t) { - const size_t offset = t * hopSize; - // Clear the input buffer first - std::ranges::fill(inBuffer, 0.0f); - - // Fill frame with windowed signal - const size_t samplesToRead = - std::min(fftWindowSize, waveform.size() - offset); - for (size_t i = 0; i < samplesToRead; i++) { - inBuffer[i] = waveform[offset + i] * hann[i]; - } - - fft.doFFT(inBuffer.data(), outBuffer); - - // Calculate magnitudes in dB (only positive frequencies) - for (size_t i = 0; i < numBins; i++) { - const auto magnitude = std::abs(outBuffer[i]) * magnitudeScale; - const auto magnitude_db = - dbConversionFactor * log10f(magnitude + epsilon); - magnitudes.push_back(magnitude_db); - } - } - - return magnitudes; -} - } // namespace rnexecutorch::dsp diff --git a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp index d07dbfb3c..dbc974706 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include namespace rnexecutorch::models::voice_activity_detection { @@ -158,4 +157,4 @@ VoiceActivityDetection::postprocess(const std::vector &scores, return speechSegments; } -} // namespace rnexecutorch::models::voice_activity_detection \ No newline at end of file +} // namespace rnexecutorch::models::voice_activity_detection From cee83da03569dbf5df913b049b0bcad874c103b6 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Tue, 2 Dec 2025 14:12:53 +0100 Subject: [PATCH 5/7] feat: add quantized whisper to model urls --- .../react-native-executorch/src/constants/modelUrls.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 57381cf15..ba5b0cde2 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -311,6 +311,9 @@ const WHISPER_TINY_EN_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/ const WHISPER_TINY_EN_ENCODER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_encoder_xnnpack.pte`; const WHISPER_TINY_EN_DECODER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_decoder_xnnpack.pte`; +const WHISPER_TINY_EN_ENCODER_QUANTIZED = `${URL_PREFIX}-whisper-tiny-quantized.en/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_quantized_en_encoder_xnnpack.pte`; +const WHISPER_TINY_EN_DECODER_QUANTIZED = `${URL_PREFIX}-whisper-tiny-quantized.en/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_quantized_en_decoder_xnnpack.pte`; + const WHISPER_BASE_EN_TOKENIZER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/tokenizer.json`; const WHISPER_BASE_EN_ENCODER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_encoder_xnnpack.pte`; const WHISPER_BASE_EN_DECODER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_decoder_xnnpack.pte`; @@ -338,6 +341,13 @@ export const WHISPER_TINY_EN = { tokenizerSource: WHISPER_TINY_EN_TOKENIZER, }; +export const WHISPER_TINY_EN_QUANTIZED = { + isMultilingual: false, + encoderSource: WHISPER_TINY_EN_ENCODER_QUANTIZED, + decoderSource: WHISPER_TINY_EN_DECODER_QUANTIZED, + tokenizerSource: WHISPER_TINY_EN_TOKENIZER, +}; + export const WHISPER_BASE_EN = { isMultilingual: false, encoderSource: WHISPER_BASE_EN_ENCODER, From d07dc16ce642ebfe4514b18b6c4125b1556c0b99 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Tue, 2 Dec 2025 14:24:09 +0100 Subject: [PATCH 6/7] chore: update all the whisper versions to use new version tag --- .../src/constants/modelUrls.ts | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index ba5b0cde2..124f619ed 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -2,7 +2,7 @@ import { Platform } from 'react-native'; const URL_PREFIX = 'https://huggingface.co/software-mansion/react-native-executorch'; -const VERSION_TAG = 'resolve/v0.5.0'; +const VERSION_TAG = 'resolve/v0.6.0'; const NEXT_VERSION_TAG = 'resolve/v0.6.0'; // LLMs @@ -307,32 +307,32 @@ export const STYLE_TRANSFER_UDNIE = { }; // S2T -const WHISPER_TINY_EN_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/tokenizer.json`; -const WHISPER_TINY_EN_ENCODER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_encoder_xnnpack.pte`; -const WHISPER_TINY_EN_DECODER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_decoder_xnnpack.pte`; +const WHISPER_TINY_EN_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${NEXT_VERSION_TAG}/tokenizer.json`; +const WHISPER_TINY_EN_ENCODER = `${URL_PREFIX}-whisper-tiny.en/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_en_encoder_xnnpack.pte`; +const WHISPER_TINY_EN_DECODER = `${URL_PREFIX}-whisper-tiny.en/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_en_decoder_xnnpack.pte`; const WHISPER_TINY_EN_ENCODER_QUANTIZED = `${URL_PREFIX}-whisper-tiny-quantized.en/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_quantized_en_encoder_xnnpack.pte`; const WHISPER_TINY_EN_DECODER_QUANTIZED = `${URL_PREFIX}-whisper-tiny-quantized.en/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_quantized_en_decoder_xnnpack.pte`; -const WHISPER_BASE_EN_TOKENIZER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/tokenizer.json`; -const WHISPER_BASE_EN_ENCODER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_encoder_xnnpack.pte`; -const WHISPER_BASE_EN_DECODER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_decoder_xnnpack.pte`; +const WHISPER_BASE_EN_TOKENIZER = `${URL_PREFIX}-whisper-base.en/${NEXT_VERSION_TAG}/tokenizer.json`; +const WHISPER_BASE_EN_ENCODER = `${URL_PREFIX}-whisper-base.en/${NEXT_VERSION_TAG}/xnnpack/whisper_base_en_encoder_xnnpack.pte`; +const WHISPER_BASE_EN_DECODER = `${URL_PREFIX}-whisper-base.en/${NEXT_VERSION_TAG}/xnnpack/whisper_base_en_decoder_xnnpack.pte`; -const WHISPER_SMALL_EN_TOKENIZER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/tokenizer.json`; -const WHISPER_SMALL_EN_ENCODER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/xnnpack/whisper_small_en_encoder_xnnpack.pte`; -const WHISPER_SMALL_EN_DECODER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/xnnpack/whisper_small_en_decoder_xnnpack.pte`; +const WHISPER_SMALL_EN_TOKENIZER = `${URL_PREFIX}-whisper-small.en/${NEXT_VERSION_TAG}/tokenizer.json`; +const WHISPER_SMALL_EN_ENCODER = `${URL_PREFIX}-whisper-small.en/${NEXT_VERSION_TAG}/xnnpack/whisper_small_en_encoder_xnnpack.pte`; +const WHISPER_SMALL_EN_DECODER = `${URL_PREFIX}-whisper-small.en/${NEXT_VERSION_TAG}/xnnpack/whisper_small_en_decoder_xnnpack.pte`; -const WHISPER_TINY_TOKENIZER = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/tokenizer.json`; -const WHISPER_TINY_ENCODER_MODEL = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/whisper_tiny_encoder_xnnpack.pte`; -const WHISPER_TINY_DECODER_MODEL = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/whisper_tiny_decoder_xnnpack.pte`; +const WHISPER_TINY_TOKENIZER = `${URL_PREFIX}-whisper-tiny/${NEXT_VERSION_TAG}/tokenizer.json`; +const WHISPER_TINY_ENCODER_MODEL = `${URL_PREFIX}-whisper-tiny/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_encoder_xnnpack.pte`; +const WHISPER_TINY_DECODER_MODEL = `${URL_PREFIX}-whisper-tiny/${NEXT_VERSION_TAG}/xnnpack/whisper_tiny_decoder_xnnpack.pte`; -const WHISPER_BASE_TOKENIZER = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/tokenizer.json`; -const WHISPER_BASE_ENCODER_MODEL = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/xnnpack/whisper_base_encoder_xnnpack.pte`; -const WHISPER_BASE_DECODER_MODEL = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/xnnpack/whisper_base_decoder_xnnpack.pte`; +const WHISPER_BASE_TOKENIZER = `${URL_PREFIX}-whisper-base/${NEXT_VERSION_TAG}/tokenizer.json`; +const WHISPER_BASE_ENCODER_MODEL = `${URL_PREFIX}-whisper-base/${NEXT_VERSION_TAG}/xnnpack/whisper_base_encoder_xnnpack.pte`; +const WHISPER_BASE_DECODER_MODEL = `${URL_PREFIX}-whisper-base/${NEXT_VERSION_TAG}/xnnpack/whisper_base_decoder_xnnpack.pte`; -const WHISPER_SMALL_TOKENIZER = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/tokenizer.json`; -const WHISPER_SMALL_ENCODER_MODEL = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/xnnpack/whisper_small_encoder_xnnpack.pte`; -const WHISPER_SMALL_DECODER_MODEL = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/xnnpack/whisper_small_decoder_xnnpack.pte`; +const WHISPER_SMALL_TOKENIZER = `${URL_PREFIX}-whisper-small/${NEXT_VERSION_TAG}/tokenizer.json`; +const WHISPER_SMALL_ENCODER_MODEL = `${URL_PREFIX}-whisper-small/${NEXT_VERSION_TAG}/xnnpack/whisper_small_encoder_xnnpack.pte`; +const WHISPER_SMALL_DECODER_MODEL = `${URL_PREFIX}-whisper-small/${NEXT_VERSION_TAG}/xnnpack/whisper_small_decoder_xnnpack.pte`; export const WHISPER_TINY_EN = { isMultilingual: false, From d1fb3b3cadf047b7802df6afd02095e87a77e06a Mon Sep 17 00:00:00 2001 From: Jakub Chmura <92989966+chmjkb@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:19:17 +0100 Subject: [PATCH 7/7] Update packages/react-native-executorch/src/constants/modelUrls.ts Co-authored-by: IgorSwat <114943112+IgorSwat@users.noreply.github.com> --- packages/react-native-executorch/src/constants/modelUrls.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 124f619ed..e9fe9e4d9 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -2,7 +2,7 @@ import { Platform } from 'react-native'; const URL_PREFIX = 'https://huggingface.co/software-mansion/react-native-executorch'; -const VERSION_TAG = 'resolve/v0.6.0'; +const VERSION_TAG = 'resolve/v0.5.0'; const NEXT_VERSION_TAG = 'resolve/v0.6.0'; // LLMs