From 0231626db00e31a95c2dc7848595b440fa34cf4d Mon Sep 17 00:00:00 2001 From: chmjkb Date: Mon, 8 Dec 2025 13:05:07 +0100 Subject: [PATCH 1/2] docs: add v0.6.0 release, fix mobile navigation issues --- .../src/components/Hero/StartScreen/index.tsx | 2 +- .../fundamentals/getting-started.md | 1 + .../01-fundamentals/_category_.json | 5 +- .../01-fundamentals/01-getting-started.md | 99 + .../01-fundamentals/02-loading-models.md | 50 + .../03-frequently-asked-questions.md | 39 + .../01-fundamentals/_category_.json | 6 + .../_category_.json | 6 + .../01-natural-language-processing/useLLM.md | 537 +++ .../useSpeechToText.md | 343 ++ .../useTextEmbeddings.md | 158 + .../useTokenizer.md | 104 + .../01-natural-language-processing/useVAD.md | 194 + .../02-computer-vision/_category_.json | 6 + .../02-computer-vision/useClassification.md | 113 + .../02-computer-vision/useImageEmbeddings.md | 132 + .../useImageSegmentation.md | 117 + .../02-hooks/02-computer-vision/useOCR.md | 332 ++ .../02-computer-vision/useObjectDetection.md | 152 + .../02-computer-vision/useStyleTransfer.md | 114 + .../02-computer-vision/useTextToImage.md | 133 + .../02-computer-vision/useVerticalOCR.md | 347 ++ .../03-executorch-bindings/_category_.json | 6 + .../useExecutorchModule.md | 155 + .../version-0.6.0/02-hooks/_category_.json | 6 + .../LLMModule.md | 172 + .../SpeechToTextModule.md | 252 ++ .../TextEmbeddingsModule.md | 59 + .../TokenizerModule.md | 60 + .../VADModule.md | 64 + .../_category_.json | 6 + .../ClassificationModule.md | 64 + .../ImageEmbeddingsModule.md | 60 + .../ImageSegmentationModule.md | 77 + .../02-computer-vision/OCRModule.md | 135 + .../ObjectDetectionModule.md | 77 + .../02-computer-vision/StyleTransferModule.md | 64 + .../02-computer-vision/TextToImageModule.md | 82 + .../02-computer-vision/VerticalOCRModule.md | 151 + .../02-computer-vision/_category_.json | 6 + .../ExecutorchModule.md | 164 + .../03-executorch-bindings/_category_.json | 6 + .../03-typescript-api/_category_.json | 6 + .../04-benchmarks/_category_.json | 6 + .../04-benchmarks/inference-time.md | 111 + .../04-benchmarks/memory-usage.md | 81 + .../version-0.6.0/04-benchmarks/model-size.md | 96 + .../05-utilities/_category_.json | 6 + .../05-utilities/resource-fetcher.md | 218 + .../version-0.6.0-sidebars.json | 8 + docs/versions.json | 2 +- docs/yarn.lock | 3751 ++++++++--------- 52 files changed, 6923 insertions(+), 2018 deletions(-) create mode 100644 docs/versioned_docs/version-0.6.0/01-fundamentals/01-getting-started.md create mode 100644 docs/versioned_docs/version-0.6.0/01-fundamentals/02-loading-models.md create mode 100644 docs/versioned_docs/version-0.6.0/01-fundamentals/03-frequently-asked-questions.md create mode 100644 docs/versioned_docs/version-0.6.0/01-fundamentals/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useLLM.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useSpeechToText.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useTextEmbeddings.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useTokenizer.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useVAD.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useClassification.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useImageEmbeddings.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useImageSegmentation.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useOCR.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useObjectDetection.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useStyleTransfer.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useTextToImage.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/02-computer-vision/useVerticalOCR.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/03-executorch-bindings/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/03-executorch-bindings/useExecutorchModule.md create mode 100644 docs/versioned_docs/version-0.6.0/02-hooks/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/01-natural-language-processing/LLMModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/01-natural-language-processing/SpeechToTextModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/01-natural-language-processing/TokenizerModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/01-natural-language-processing/VADModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/01-natural-language-processing/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/ClassificationModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/ImageEmbeddingsModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/ImageSegmentationModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/OCRModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/ObjectDetectionModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/StyleTransferModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/TextToImageModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/VerticalOCRModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/02-computer-vision/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/03-executorch-bindings/ExecutorchModule.md create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/03-executorch-bindings/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/03-typescript-api/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/04-benchmarks/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/04-benchmarks/inference-time.md create mode 100644 docs/versioned_docs/version-0.6.0/04-benchmarks/memory-usage.md create mode 100644 docs/versioned_docs/version-0.6.0/04-benchmarks/model-size.md create mode 100644 docs/versioned_docs/version-0.6.0/05-utilities/_category_.json create mode 100644 docs/versioned_docs/version-0.6.0/05-utilities/resource-fetcher.md create mode 100644 docs/versioned_sidebars/version-0.6.0-sidebars.json diff --git a/docs/src/components/Hero/StartScreen/index.tsx b/docs/src/components/Hero/StartScreen/index.tsx index 34ae18fd8..e22097ffe 100644 --- a/docs/src/components/Hero/StartScreen/index.tsx +++ b/docs/src/components/Hero/StartScreen/index.tsx @@ -24,7 +24,7 @@ const StartScreen = () => {
diff --git a/docs/versioned_docs/version-0.4.x/fundamentals/getting-started.md b/docs/versioned_docs/version-0.4.x/fundamentals/getting-started.md index 46da3f6f5..572b019f2 100644 --- a/docs/versioned_docs/version-0.4.x/fundamentals/getting-started.md +++ b/docs/versioned_docs/version-0.4.x/fundamentals/getting-started.md @@ -1,5 +1,6 @@ --- title: Getting Started +slug: / keywords: [ react native, diff --git a/docs/versioned_docs/version-0.5.x/01-fundamentals/_category_.json b/docs/versioned_docs/version-0.5.x/01-fundamentals/_category_.json index e3fddcbeb..3d3a48f26 100644 --- a/docs/versioned_docs/version-0.5.x/01-fundamentals/_category_.json +++ b/docs/versioned_docs/version-0.5.x/01-fundamentals/_category_.json @@ -1,6 +1,3 @@ { - "label": "Fundamentals", - "link": { - "type": "generated-index" - } + "label": "Fundamentals" } diff --git a/docs/versioned_docs/version-0.6.0/01-fundamentals/01-getting-started.md b/docs/versioned_docs/version-0.6.0/01-fundamentals/01-getting-started.md new file mode 100644 index 000000000..967630747 --- /dev/null +++ b/docs/versioned_docs/version-0.6.0/01-fundamentals/01-getting-started.md @@ -0,0 +1,99 @@ +--- +title: Getting Started +keywords: + [ + react native, + react native ai, + react native llm, + react native qwen, + react native llama, + react native executorch, + executorch, + on-device ai, + pytorch, + mobile ai, + ] +description: 'Get started with React Native ExecuTorch - a framework for running AI models on-device in your React Native applications.' +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## What is ExecuTorch? + +ExecuTorch is a novel AI framework developed by Meta, designed to streamline deploying PyTorch models on a variety of devices, including mobile phones and microcontrollers. This framework enables exporting models into standalone binaries, allowing them to run locally without requiring API calls. ExecuTorch achieves state-of-the-art performance through optimizations and delegates such as Core ML and XNNPACK. It provides a seamless export process with robust debugging options, making it easier to resolve issues if they arise. + +## React Native ExecuTorch + +React Native ExecuTorch is our way of bringing ExecuTorch into the React Native world. Our API is built to be simple, declarative, and efficient. Plus, we’ll provide a set of pre-exported models for common use cases, so you won’t have to worry about handling exports yourself. With just a few lines of JavaScript, you’ll be able to run AI models (even LLMs 👀) right on your device—keeping user data private and saving on cloud costs. + +## Compatibility + +React Native Executorch supports only the [New React Native architecture](https://reactnative.dev/architecture/landing-page). + +If your app still runs on the old architecture, please consider upgrading to the New Architecture. + +## Installation + +Installation is pretty straightforward, just use your favorite package manager. + + + + + ``` + npm install react-native-executorch + ``` + + + + + ``` + pnpm install react-native-executorch + ``` + + + + + ``` + yarn add react-native-executorch + ``` + + + + +If you're using bare React Native (instead of a managed Expo project), you also need to install Expo Modules because the underlying implementation relies on expo-file-system. Since expo-file-system is an Expo package, bare React Native projects need **Expo Modules** to properly integrate and use it. The link provided (https://docs.expo.dev/bare/installing-expo-modules/) offers guidance on setting up Expo Modules in a bare React Native environment. + +If you plan on using your models via require() instead of fetching them from a url, you also need to add following lines to your `metro.config.js`: + +```json +// metro.config.js +... + defaultConfig.resolver.assetExts.push('pte') + defaultConfig.resolver.assetExts.push('bin') +... +``` + +This allows us to use binaries, such as exported models or tokenizers for LLMs. + +:::caution +When using Expo, please note that you need to use a custom development build of your app, not the standard Expo Go app. This is because we rely on native modules, which Expo Go doesn’t support. +::: + +:::info +Because we are using ExecuTorch under the hood, you won't be able to build iOS app for release with simulator selected as the target device. Make sure to test release builds on real devices. +::: + +Running the app with the library: + +```bash +yarn run expo: -d +``` + +## Good reads + +If you want to dive deeper into ExecuTorch or our previous work with the framework, we highly encourage you to check out the following resources: + +- [ExecuTorch docs](https://pytorch.org/executorch/stable/index.html) +- [Native code for iOS](https://medium.com/swmansion/bringing-native-ai-to-your-mobile-apps-with-executorch-part-i-ios-f1562a4556e8?source=user_profile_page---------0-------------250189c98ccf---------------) +- [Native code for Android](https://medium.com/swmansion/bringing-native-ai-to-your-mobile-apps-with-executorch-part-ii-android-29431b6b9f7f?source=user_profile_page---------2-------------b8e3a5cb1c63---------------) +- [Exporting to Android with XNNPACK](https://medium.com/swmansion/exporting-ai-models-on-android-with-xnnpack-and-executorch-3e70cff51c59?source=user_profile_page---------1-------------b8e3a5cb1c63---------------) diff --git a/docs/versioned_docs/version-0.6.0/01-fundamentals/02-loading-models.md b/docs/versioned_docs/version-0.6.0/01-fundamentals/02-loading-models.md new file mode 100644 index 000000000..8763d9614 --- /dev/null +++ b/docs/versioned_docs/version-0.6.0/01-fundamentals/02-loading-models.md @@ -0,0 +1,50 @@ +--- +title: Loading Models +--- + +There are three different methods available for loading model files, depending on their size and location. + +**1. Load from React Native assets folder (For Files < 512MB)** + +```typescript +useExecutorchModule({ + modelSource: require('../assets/llama3_2.pte'), +}); +``` + +**2. Load from remote URL:** + +For files larger than 512MB or when you want to keep size of the app smaller, you can load the model from a remote URL (e.g. HuggingFace). + +```typescript +useExecutorchModule({ + modelSource: 'https://.../llama3_2.pte', +}); +``` + +**3. Load from local file system:** + +If you prefer to delegate the process of obtaining and loading model and tokenizer files to the user, you can use the following method: + +```typescript +useExecutorchModule({ + modelSource: 'file:///var/mobile/.../llama3_2.pte', +}); +``` + +:::info +The downloaded files are stored in documents directory of your application. +::: + +## Example + +The following code snippet demonstrates how to load model and tokenizer files using `useLLM` hook: + +```typescript +import { useLLM } from 'react-native-executorch'; + +const llama = useLLM({ + modelSource: 'https://.../llama3_2.pte', + tokenizerSource: require('../assets/tokenizer.bin'), +}); +``` diff --git a/docs/versioned_docs/version-0.6.0/01-fundamentals/03-frequently-asked-questions.md b/docs/versioned_docs/version-0.6.0/01-fundamentals/03-frequently-asked-questions.md new file mode 100644 index 000000000..03914b25d --- /dev/null +++ b/docs/versioned_docs/version-0.6.0/01-fundamentals/03-frequently-asked-questions.md @@ -0,0 +1,39 @@ +--- +title: Frequently Asked Questions +--- + +This section is meant to answer some common community inquiries, especially regarding the ExecuTorch runtime or adding your own models. If you can't see an answer to your question, feel free to open up a [discussion](https://github.com/software-mansion/react-native-executorch/discussions/new/choose). + +### What models are supported? + +Each hook documentation subpage (useClassification, useLLM, etc.) contains a supported models section, which lists the models that are runnable within the library with close to no setup. For running your custom models, refer to `ExecuTorchModule` or `useExecuTorchModule`. + +### How can I run my own AI model? + +To run your own model, you need to directly access the underlying [ExecuTorch Module API](https://pytorch.org/executorch/stable/extension-module.html). We provide an experimental [React hook](../02-hooks/03-executorch-bindings/useExecutorchModule.md) along with a [TypeScript alternative](../03-typescript-api/03-executorch-bindings/ExecutorchModule.md), which serve as a way to use the aforementioned API without the need of diving into native code. In order to get a model in a format runnable by the runtime, you'll need to get your hands dirty with some ExecuTorch knowledge. For more guides on exporting models, please refer to the [ExecuTorch tutorials](https://pytorch.org/executorch/stable/tutorials/export-to-executorch-tutorial.html). Once you obtain your model in a `.pte` format, you can run it with `useExecuTorchModule` and `ExecuTorchModule`. + +### Can you do function calling with useLLM? + +If your model supports tool calling (i.e. its chat template can process tools) you can use the method explained on the [useLLM page](../02-hooks/01-natural-language-processing/useLLM.md). + +If your model doesn't support it, you can still work around it using context. For details, refer to [this comment](https://github.com/software-mansion/react-native-executorch/issues/173#issuecomment-2775082278). + +### Can I use React Native ExecuTorch in bare React Native apps? + +To use the library, you need to install Expo Modules first. For a setup guide, refer to [this tutorial](https://docs.expo.dev/bare/installing-expo-modules/). This is because we use Expo File System under the hood to download and manage the model binaries. + +### Do you support the old architecture? + +The old architecture is not supported and we're currently not planning to add support. + +### Can I run GGUF models using the library? + +No, as of now ExecuTorch runtime doesn't provide a reliable way to use GGUF models, hence it is not possible. + +### Are the models leveraging GPU acceleration? + +While it is possible to run some models using Core ML on iOS, which is a backend that utilizes CPU, GPU and ANE, we currently don't have many models exported to Core ML. For Android, the current state of GPU acceleration is pretty limited. As of now, there are attempts of running the models using a Vulkan backend. However the operator support is very limited meaning that the resulting performance is often inferior to XNNPACK. Hence, most of the models use XNNPACK, which is a highly optimized and mature CPU backend that runs on both Android and iOS. + +### Does this library support XNNPACK and Core ML? + +Yes, all of the backends are linked, therefore the only thing that needs to be done on your end is to export the model with the backend that you're interested in using. diff --git a/docs/versioned_docs/version-0.6.0/01-fundamentals/_category_.json b/docs/versioned_docs/version-0.6.0/01-fundamentals/_category_.json new file mode 100644 index 000000000..e3fddcbeb --- /dev/null +++ b/docs/versioned_docs/version-0.6.0/01-fundamentals/_category_.json @@ -0,0 +1,6 @@ +{ + "label": "Fundamentals", + "link": { + "type": "generated-index" + } +} diff --git a/docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/_category_.json b/docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/_category_.json new file mode 100644 index 000000000..0314f315d --- /dev/null +++ b/docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/_category_.json @@ -0,0 +1,6 @@ +{ + "label": "Natural Language Processing", + "link": { + "type": "generated-index" + } +} diff --git a/docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useLLM.md b/docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useLLM.md new file mode 100644 index 000000000..3f072f93c --- /dev/null +++ b/docs/versioned_docs/version-0.6.0/02-hooks/01-natural-language-processing/useLLM.md @@ -0,0 +1,537 @@ +--- +title: useLLM +keywords: + [ + react native, + react native ai, + react native llm, + react native qwen, + react native llama, + react native executorch, + executorch, + pytorch, + on-device ai, + mobile ai, + llama 3, + qwen, + text generation, + tool calling, + function calling, + ] +description: "Learn how to use LLMs in your React Native applications with React Native ExecuTorch's useLLM hook." +--- + +React Native ExecuTorch supports a variety of LLMs (checkout our [HuggingFace repository](https://huggingface.co/software-mansion) for model already converted to ExecuTorch format) including Llama 3.2. Before getting started, you’ll need to obtain the .pte binary—a serialized model, the tokenizer and tokenizer config JSON files. There are various ways to accomplish this: + +- For your convenience, it's best if you use models exported by us, you can get them from our [HuggingFace repository](https://huggingface.co/software-mansion). You can also use [constants](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/constants/modelUrls.ts) shipped with our library. +- Follow the official [tutorial](https://github.com/pytorch/executorch/blob/release/0.7/examples/demo-apps/android/LlamaDemo/docs/delegates/xnnpack_README.md) made by ExecuTorch team to build the model and tokenizer yourself. + +:::danger +Lower-end devices might not be able to fit LLMs into memory. We recommend using quantized models to reduce the memory footprint. +::: + +## Initializing + +In order to load a model into the app, you need to run the following code: + +```typescript +import { useLLM, LLAMA3_2_1B } from 'react-native-executorch'; + +const llm = useLLM({ model: LLAMA3_2_1B }); +``` + +
+ +The code snippet above fetches the model from the specified URL, loads it into memory, and returns an object with various functions and properties for controlling the model. You can monitor the loading progress by checking the `llm.downloadProgress` and `llm.isReady` property, and if anything goes wrong, the `llm.error` property will contain the error message. + +### Arguments + +**`model`** - Object containing the model source, tokenizer source, and tokenizer config source. + +- **`modelSource`** - `ResourceSource` that specifies the location of the model binary. + +- **`tokenizerSource`** - `ResourceSource` pointing to the JSON file which contains the tokenizer. + +- **`tokenizerConfigSource`** - `ResourceSource` pointing to the JSON file which contains the tokenizer config. + +**`preventLoad?`** - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook. + +For more information on loading resources, take a look at [loading models](../../01-fundamentals/02-loading-models.md) page. + +### Returns + +| Field | Type | Description | +| ------------------------ | -------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| `generate()` | `(messages: Message[], tools?: LLMTool[]) => Promise` | Runs model to complete chat passed in `messages` argument. It doesn't manage conversation context. | +| `interrupt()` | `() => void` | Function to interrupt the current inference. | +| `response` | `string` | State of the generated response. This field is updated with each token generated by the model. | +| `token` | `string` | The most recently generated token. | +| `isReady` | `boolean` | Indicates whether the model is ready. | +| `isGenerating` | `boolean` | Indicates whether the model is currently generating a response. | +| `downloadProgress` | `number` | Represents the download progress as a value between 0 and 1, indicating the extent of the model file retrieval. | +| `error` | string | null | Contains the error message if the model failed to load. | +| `configure` | `({chatConfig?: Partial, toolsConfig?: ToolsConfig, generationConfig?: GenerationConfig}) => void` | Configures chat and tool calling. See more details in [configuring the model](#configuring-the-model). | +| `sendMessage` | `(message: string) => Promise` | Function to add user message to conversation. After model responds, `messageHistory` will be updated with both user message and model response. | +| `deleteMessage` | `(index: number) => void` | Deletes all messages starting with message on `index` position. After deletion `messageHistory` will be updated. | +| `messageHistory` | `Message[]` | History containing all messages in conversation. This field is updated after model responds to `sendMessage`. | +| `getGeneratedTokenCount` | `() => number` | Returns the number of tokens generated in the last response. | + +
+Type definitions + +```typescript +const useLLM: ({ + model, + preventLoad, +}: { + model: { + modelSource: ResourceSource; + tokenizerSource: ResourceSource; + tokenizerConfigSource: ResourceSource; + }; + preventLoad?: boolean; +}) => LLMType; + +interface LLMType { + messageHistory: Message[]; + response: string; + token: string; + isReady: boolean; + isGenerating: boolean; + downloadProgress: number; + error: string | null; + configure: ({ + chatConfig, + toolsConfig, + generationConfig, + }: { + chatConfig?: Partial; + toolsConfig?: ToolsConfig; + generationConfig?: GenerationConfig; + }) => void; + getGeneratedTokenCount: () => number; + generate: (messages: Message[], tools?: LLMTool[]) => Promise; + sendMessage: (message: string) => Promise; + deleteMessage: (index: number) => void; + interrupt: () => void; +} + +type ResourceSource = string | number | object; + +type MessageRole = 'user' | 'assistant' | 'system'; + +interface Message { + role: MessageRole; + content: string; +} +interface ChatConfig { + initialMessageHistory: Message[]; + contextWindowLength: number; + systemPrompt: string; +} + +interface GenerationConfig { + temperature?: number; + topp?: number; + outputTokenBatchSize?: number; + batchTimeInterval?: number; +} + +// tool calling +interface ToolsConfig { + tools: LLMTool[]; + executeToolCallback: (call: ToolCall) => Promise; + displayToolCalls?: boolean; +} + +interface ToolCall { + toolName: string; + arguments: Object; +} + +type LLMTool = Object; +``` + +
+ +## Functional vs managed + +You can use functions returned from this hooks in two manners: + +1. Functional/pure - we will not keep any state for you. You'll need to keep conversation history and handle function calling yourself. Use `generate` (and rarely `forward`) and `response`. Note that you don't need to run `configure` to use those. Furthermore, `chatConfig` and `toolsConfig` will not have any effect on those functions. + +2. Managed/stateful - we will manage conversation state. Tool calls will be parsed and called automatically after passing appropriate callbacks. See more at [managed LLM chat](#managed-llm-chat). + +## Functional way + +### Simple generation + +To perform chat completion you can use the `generate` function. There is no return value. Instead, the `response` value is updated with each token. + +```tsx +const llm = useLLM({ model: LLAMA3_2_1B }); + +const handleGenerate = () => { + const chat: Message[] = [ + { role: 'system', content: 'You are a helpful assistant' }, + { role: 'user', content: 'Hi!' }, + { role: 'assistant', content: 'Hi!, how can I help you?' }, + { role: 'user', content: 'What is the meaning of life?' }, + ]; + + // Chat completion + llm.generate(chat); +}; + +return ( + +