diff --git a/core/config/meta/build.go b/core/config/meta/build.go new file mode 100644 index 000000000000..b45d7c569fcb --- /dev/null +++ b/core/config/meta/build.go @@ -0,0 +1,141 @@ +package meta + +import ( + "reflect" + "sort" + "sync" +) + +var ( + cachedMetadata *ConfigMetadata + cacheMu sync.RWMutex +) + +// BuildConfigMetadata reflects on the given struct type (ModelConfig), +// merges the enrichment registry, and returns the full ConfigMetadata. +// The result is cached in memory after the first call. +func BuildConfigMetadata(modelConfigType reflect.Type) *ConfigMetadata { + cacheMu.RLock() + if cachedMetadata != nil { + cacheMu.RUnlock() + return cachedMetadata + } + cacheMu.RUnlock() + + cacheMu.Lock() + defer cacheMu.Unlock() + + // Double-check after acquiring write lock + if cachedMetadata != nil { + return cachedMetadata + } + + cachedMetadata = buildConfigMetadataUncached(modelConfigType, DefaultRegistry()) + return cachedMetadata +} + +// buildConfigMetadataUncached does the actual work without caching. +// Exported via lowercase for testability through BuildForTest. +func buildConfigMetadataUncached(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata { + fields := WalkModelConfig(modelConfigType) + + // Apply registry overrides + for i := range fields { + override, ok := registry[fields[i].Path] + if !ok { + continue + } + applyOverride(&fields[i], override) + } + + // Sort fields by section order then by field order + sectionOrder := make(map[string]int) + for _, s := range DefaultSections() { + sectionOrder[s.ID] = s.Order + } + + sort.SliceStable(fields, func(i, j int) bool { + si := sectionOrder[fields[i].Section] + sj := sectionOrder[fields[j].Section] + if si != sj { + return si < sj + } + return fields[i].Order < fields[j].Order + }) + + // Collect sections that actually have fields + usedSections := make(map[string]bool) + for _, f := range fields { + usedSections[f.Section] = true + } + + var sections []Section + for _, s := range DefaultSections() { + if usedSections[s.ID] { + sections = append(sections, s) + } + } + + return &ConfigMetadata{ + Sections: sections, + Fields: fields, + } +} + +// applyOverride merges non-zero override values into the field. +func applyOverride(f *FieldMeta, o FieldMetaOverride) { + if o.Section != "" { + f.Section = o.Section + } + if o.Label != "" { + f.Label = o.Label + } + if o.Description != "" { + f.Description = o.Description + } + if o.Component != "" { + f.Component = o.Component + } + if o.Placeholder != "" { + f.Placeholder = o.Placeholder + } + if o.Default != nil { + f.Default = o.Default + } + if o.Min != nil { + f.Min = o.Min + } + if o.Max != nil { + f.Max = o.Max + } + if o.Step != nil { + f.Step = o.Step + } + if o.Options != nil { + f.Options = o.Options + } + if o.AutocompleteProvider != "" { + f.AutocompleteProvider = o.AutocompleteProvider + } + if o.VRAMImpact { + f.VRAMImpact = true + } + if o.Advanced { + f.Advanced = true + } + if o.Order != 0 { + f.Order = o.Order + } +} + +// BuildForTest builds metadata without caching, for use in tests. +func BuildForTest(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata { + return buildConfigMetadataUncached(modelConfigType, registry) +} + +// ResetCache clears the cached metadata (useful for testing). +func ResetCache() { + cacheMu.Lock() + defer cacheMu.Unlock() + cachedMetadata = nil +} diff --git a/core/config/meta/build_test.go b/core/config/meta/build_test.go new file mode 100644 index 000000000000..aa9acb889a8c --- /dev/null +++ b/core/config/meta/build_test.go @@ -0,0 +1,211 @@ +package meta_test + +import ( + "reflect" + "testing" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/config/meta" +) + +func TestBuildConfigMetadata(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + if len(md.Sections) == 0 { + t.Fatal("expected sections, got 0") + } + if len(md.Fields) == 0 { + t.Fatal("expected fields, got 0") + } + + // Verify sections are ordered + for i := 1; i < len(md.Sections); i++ { + if md.Sections[i].Order < md.Sections[i-1].Order { + t.Errorf("sections not ordered: %s (order=%d) before %s (order=%d)", + md.Sections[i-1].ID, md.Sections[i-1].Order, + md.Sections[i].ID, md.Sections[i].Order) + } + } +} + +func TestRegistryOverrides(t *testing.T) { + registry := map[string]meta.FieldMetaOverride{ + "name": { + Label: "My Custom Label", + Description: "Custom description", + Component: "textarea", + Order: 999, + }, + } + + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), registry) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + f, ok := byPath["name"] + if !ok { + t.Fatal("field 'name' not found") + } + if f.Label != "My Custom Label" { + t.Errorf("expected label 'My Custom Label', got %q", f.Label) + } + if f.Description != "Custom description" { + t.Errorf("expected description 'Custom description', got %q", f.Description) + } + if f.Component != "textarea" { + t.Errorf("expected component 'textarea', got %q", f.Component) + } + if f.Order != 999 { + t.Errorf("expected order 999, got %d", f.Order) + } +} + +func TestUnregisteredFieldsGetDefaults(t *testing.T) { + // Use empty registry - all fields should still get auto-generated metadata + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), map[string]meta.FieldMetaOverride{}) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // context_size should still exist with auto-generated label + f, ok := byPath["context_size"] + if !ok { + t.Fatal("field 'context_size' not found") + } + if f.Label == "" { + t.Error("expected auto-generated label, got empty") + } + if f.UIType != "int" { + t.Errorf("expected UIType 'int', got %q", f.UIType) + } + if f.Component == "" { + t.Error("expected auto-generated component, got empty") + } +} + +func TestDefaultRegistryOverridesApply(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // Verify enriched fields got their overrides + tests := []struct { + path string + label string + description string + vramImpact bool + }{ + {"context_size", "Context Size", "Maximum context window in tokens", true}, + {"gpu_layers", "GPU Layers", "Number of layers to offload to GPU (-1 = all)", true}, + {"backend", "Backend", "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", false}, + {"parameters.temperature", "Temperature", "Sampling temperature (higher = more creative, lower = more deterministic)", false}, + {"template.chat", "Chat Template", "Go template for chat completion requests", false}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.Label != tt.label { + t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label) + } + if f.Description != tt.description { + t.Errorf("field %q: expected description %q, got %q", tt.path, tt.description, f.Description) + } + if f.VRAMImpact != tt.vramImpact { + t.Errorf("field %q: expected vramImpact=%v, got %v", tt.path, tt.vramImpact, f.VRAMImpact) + } + } +} + +func TestStaticOptionsFields(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // Fields with static options should have Options populated and no AutocompleteProvider + staticFields := []string{"quantization", "cache_type_k", "cache_type_v", "diffusers.pipeline_type", "diffusers.scheduler_type"} + for _, path := range staticFields { + f, ok := byPath[path] + if !ok { + t.Errorf("field %q not found", path) + continue + } + if len(f.Options) == 0 { + t.Errorf("field %q: expected Options to be populated", path) + } + if f.AutocompleteProvider != "" { + t.Errorf("field %q: expected no AutocompleteProvider, got %q", path, f.AutocompleteProvider) + } + } +} + +func TestDynamicProviderFields(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + byPath := make(map[string]meta.FieldMeta, len(md.Fields)) + for _, f := range md.Fields { + byPath[f.Path] = f + } + + // Fields with dynamic providers should have AutocompleteProvider and no Options + dynamicFields := map[string]string{ + "backend": meta.ProviderBackends, + "pipeline.llm": meta.ProviderModelsChat, + "pipeline.tts": meta.ProviderModelsTTS, + "pipeline.transcription": meta.ProviderModelsTranscript, + "pipeline.vad": meta.ProviderModelsVAD, + } + for path, expectedProvider := range dynamicFields { + f, ok := byPath[path] + if !ok { + t.Errorf("field %q not found", path) + continue + } + if f.AutocompleteProvider != expectedProvider { + t.Errorf("field %q: expected AutocompleteProvider %q, got %q", path, expectedProvider, f.AutocompleteProvider) + } + if len(f.Options) != 0 { + t.Errorf("field %q: expected no Options, got %d", path, len(f.Options)) + } + } +} + +func TestVRAMImpactFields(t *testing.T) { + md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry()) + + var vramFields []string + for _, f := range md.Fields { + if f.VRAMImpact { + vramFields = append(vramFields, f.Path) + } + } + + if len(vramFields) == 0 { + t.Error("expected some VRAM impact fields, got 0") + } + + // context_size and gpu_layers should be marked + expected := map[string]bool{"context_size": true, "gpu_layers": true} + for _, path := range vramFields { + if expected[path] { + delete(expected, path) + } + } + for path := range expected { + t.Errorf("expected VRAM impact field %q not found", path) + } +} diff --git a/core/config/meta/constants.go b/core/config/meta/constants.go new file mode 100644 index 000000000000..24e24015fb49 --- /dev/null +++ b/core/config/meta/constants.go @@ -0,0 +1,63 @@ +package meta + +// Dynamic autocomplete provider constants (runtime lookup required). +const ( + ProviderBackends = "backends" + ProviderModels = "models" + ProviderModelsChat = "models:chat" + ProviderModelsTTS = "models:tts" + ProviderModelsTranscript = "models:transcript" + ProviderModelsVAD = "models:vad" +) + +// Static option lists embedded directly in field metadata. + +var QuantizationOptions = []FieldOption{ + {Value: "q4_0", Label: "Q4_0"}, + {Value: "q4_1", Label: "Q4_1"}, + {Value: "q5_0", Label: "Q5_0"}, + {Value: "q5_1", Label: "Q5_1"}, + {Value: "q8_0", Label: "Q8_0"}, + {Value: "q2_K", Label: "Q2_K"}, + {Value: "q3_K_S", Label: "Q3_K_S"}, + {Value: "q3_K_M", Label: "Q3_K_M"}, + {Value: "q3_K_L", Label: "Q3_K_L"}, + {Value: "q4_K_S", Label: "Q4_K_S"}, + {Value: "q4_K_M", Label: "Q4_K_M"}, + {Value: "q5_K_S", Label: "Q5_K_S"}, + {Value: "q5_K_M", Label: "Q5_K_M"}, + {Value: "q6_K", Label: "Q6_K"}, +} + +var CacheTypeOptions = []FieldOption{ + {Value: "f16", Label: "F16"}, + {Value: "f32", Label: "F32"}, + {Value: "q8_0", Label: "Q8_0"}, + {Value: "q4_0", Label: "Q4_0"}, + {Value: "q4_1", Label: "Q4_1"}, + {Value: "q5_0", Label: "Q5_0"}, + {Value: "q5_1", Label: "Q5_1"}, +} + +var DiffusersPipelineOptions = []FieldOption{ + {Value: "StableDiffusionPipeline", Label: "StableDiffusionPipeline"}, + {Value: "StableDiffusionImg2ImgPipeline", Label: "StableDiffusionImg2ImgPipeline"}, + {Value: "StableDiffusionXLPipeline", Label: "StableDiffusionXLPipeline"}, + {Value: "StableDiffusionXLImg2ImgPipeline", Label: "StableDiffusionXLImg2ImgPipeline"}, + {Value: "StableDiffusionDepth2ImgPipeline", Label: "StableDiffusionDepth2ImgPipeline"}, + {Value: "DiffusionPipeline", Label: "DiffusionPipeline"}, + {Value: "StableVideoDiffusionPipeline", Label: "StableVideoDiffusionPipeline"}, +} + +var DiffusersSchedulerOptions = []FieldOption{ + {Value: "ddim", Label: "DDIM"}, + {Value: "ddpm", Label: "DDPM"}, + {Value: "pndm", Label: "PNDM"}, + {Value: "lms", Label: "LMS"}, + {Value: "euler", Label: "Euler"}, + {Value: "euler_a", Label: "Euler A"}, + {Value: "dpm_multistep", Label: "DPM Multistep"}, + {Value: "dpm_singlestep", Label: "DPM Singlestep"}, + {Value: "heun", Label: "Heun"}, + {Value: "unipc", Label: "UniPC"}, +} diff --git a/core/config/meta/reflect.go b/core/config/meta/reflect.go new file mode 100644 index 000000000000..ef1d0b4b07ad --- /dev/null +++ b/core/config/meta/reflect.go @@ -0,0 +1,259 @@ +package meta + +import ( + "reflect" + "strings" + "unicode" +) + +// WalkModelConfig uses reflection to discover all exported, YAML-tagged fields +// in the given struct type (expected to be config.ModelConfig) and returns a +// slice of FieldMeta with sensible defaults derived from the type information. +func WalkModelConfig(t reflect.Type) []FieldMeta { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + var fields []FieldMeta + walkStruct(t, "", "", &fields) + return fields +} + +// walkStruct recursively walks a struct type, collecting FieldMeta entries. +// prefix is the dot-path prefix for nested structs (e.g. "function.grammar."). +// parentYAMLPrefix is used for inline embedding with prefix (e.g. "parameters."). +func walkStruct(t reflect.Type, prefix, parentYAMLPrefix string, out *[]FieldMeta) { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return + } + + for i := range t.NumField() { + sf := t.Field(i) + + // Skip unexported fields + if !sf.IsExported() { + continue + } + + yamlTag := sf.Tag.Get("yaml") + if yamlTag == "-" { + continue + } + + yamlKey, opts := parseTag(yamlTag) + + // Handle inline embedding (e.g. LLMConfig `yaml:",inline"`) + if opts.contains("inline") { + ft := sf.Type + if ft.Kind() == reflect.Pointer { + ft = ft.Elem() + } + if ft.Kind() == reflect.Struct { + walkStruct(ft, prefix, parentYAMLPrefix, out) + } + continue + } + + // If no yaml key and it's an embedded struct without inline, skip unknown pattern + if yamlKey == "" { + ft := sf.Type + if ft.Kind() == reflect.Pointer { + ft = ft.Elem() + } + // Anonymous struct without yaml tag - treat as inline + if sf.Anonymous && ft.Kind() == reflect.Struct { + walkStruct(ft, prefix, parentYAMLPrefix, out) + continue + } + // Named field without yaml tag - skip + continue + } + + ft := sf.Type + isPtr := ft.Kind() == reflect.Pointer + if isPtr { + ft = ft.Elem() + } + + // Named nested struct (not a special type) -> recurse with prefix + if ft.Kind() == reflect.Struct && !isSpecialType(ft) { + nestedPrefix := prefix + yamlKey + "." + walkStruct(ft, nestedPrefix, "", out) + continue + } + + // Leaf field + path := prefix + yamlKey + goType := sf.Type.String() + uiType, component := inferUIType(sf.Type) + section := inferSection(prefix) + label := labelFromKey(yamlKey) + + *out = append(*out, FieldMeta{ + Path: path, + YAMLKey: yamlKey, + GoType: goType, + UIType: uiType, + Pointer: isPtr, + Section: section, + Label: label, + Component: component, + Order: len(*out), + }) + } +} + +// isSpecialType returns true for struct types that should be treated as leaf +// values rather than recursed into (e.g. custom JSON marshalers). +func isSpecialType(t reflect.Type) bool { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + name := t.Name() + // LogprobsValue, URI types are leaf values despite being structs + switch name { + case "LogprobsValue", "URI": + return true + } + return false +} + +// inferUIType maps a Go reflect.Type to a UI type string and default component. +func inferUIType(t reflect.Type) (uiType, component string) { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + + switch t.Kind() { + case reflect.Bool: + return "bool", "toggle" + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return "int", "number" + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + return "int", "number" + case reflect.Float32, reflect.Float64: + return "float", "number" + case reflect.String: + return "string", "input" + case reflect.Slice: + elem := t.Elem() + if elem.Kind() == reflect.String { + return "[]string", "string-list" + } + if elem.Kind() == reflect.Pointer { + elem = elem.Elem() + } + if elem.Kind() == reflect.Struct { + return "[]object", "json-editor" + } + return "[]any", "json-editor" + case reflect.Map: + return "map", "map-editor" + case reflect.Struct: + // Special types treated as leaves + if isSpecialType(t) { + return "bool", "toggle" // LogprobsValue + } + return "object", "json-editor" + default: + return "any", "input" + } +} + +// inferSection determines the config section from the dot-path prefix. +func inferSection(prefix string) string { + if prefix == "" { + return "general" + } + // Remove trailing dot + p := strings.TrimSuffix(prefix, ".") + + // Use the top-level prefix to determine section + parts := strings.SplitN(p, ".", 2) + top := parts[0] + + switch top { + case "parameters": + return "parameters" + case "template": + return "templates" + case "function": + return "functions" + case "reasoning": + return "reasoning" + case "diffusers": + return "diffusers" + case "tts": + return "tts" + case "pipeline": + return "pipeline" + case "grpc": + return "grpc" + case "agent": + return "agent" + case "mcp": + return "mcp" + case "feature_flags": + return "other" + case "limit_mm_per_prompt": + return "llm" + default: + return "other" + } +} + +// labelFromKey converts a yaml key like "context_size" to "Context Size". +func labelFromKey(key string) string { + parts := strings.Split(key, "_") + for i, p := range parts { + if len(p) > 0 { + runes := []rune(p) + runes[0] = unicode.ToUpper(runes[0]) + parts[i] = string(runes) + } + } + return strings.Join(parts, " ") +} + +// tagOptions is a set of comma-separated yaml tag options. +type tagOptions string + +func (o tagOptions) contains(optName string) bool { + s := string(o) + for s != "" { + var name string + if name, s, _ = strings.Cut(s, ","); name == optName { + return true + } + } + return false +} + +// parseTag splits a yaml struct tag into the key name and options. +func parseTag(tag string) (string, tagOptions) { + if tag == "" { + return "", "" + } + before, after, found := strings.Cut(tag, ",") + if found { + return before, tagOptions(after) + } + return tag, "" +} + +// SectionForPath returns the section ID for a given dot-path. +// Exported so tests and the registry can use it. +func SectionForPath(path string) string { + before, _, found := strings.Cut(path, ".") + if !found { + return "general" + } + return inferSection(before + ".") +} + +// GoTypeName returns a human-readable Go type string for display. +func GoTypeName(t reflect.Type) string { + return t.String() +} diff --git a/core/config/meta/reflect_test.go b/core/config/meta/reflect_test.go new file mode 100644 index 000000000000..408bb2a1ecc3 --- /dev/null +++ b/core/config/meta/reflect_test.go @@ -0,0 +1,208 @@ +package meta_test + +import ( + "reflect" + "testing" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/config/meta" +) + +func TestWalkModelConfig(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + if len(fields) == 0 { + t.Fatal("expected fields from ModelConfig, got 0") + } + + // Build a lookup by path + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + // Verify some top-level fields exist + for _, path := range []string{"name", "backend", "cuda", "step"} { + if _, ok := byPath[path]; !ok { + t.Errorf("expected field %q not found", path) + } + } + + // Verify inline LLMConfig fields appear at top level (no prefix) + for _, path := range []string{"context_size", "gpu_layers", "threads", "mmap"} { + if _, ok := byPath[path]; !ok { + t.Errorf("expected inline LLMConfig field %q not found", path) + } + } + + // Verify nested struct fields have correct prefix + for _, path := range []string{ + "template.chat", + "template.completion", + "template.use_tokenizer_template", + "function.grammar.parallel_calls", + "function.grammar.mixed_mode", + "diffusers.pipeline_type", + "diffusers.cuda", + "pipeline.llm", + "pipeline.tts", + "reasoning.disable", + "agent.max_iterations", + "grpc.attempts", + } { + if _, ok := byPath[path]; !ok { + t.Errorf("expected nested field %q not found", path) + } + } + + // Verify PredictionOptions fields have parameters. prefix + for _, path := range []string{ + "parameters.temperature", + "parameters.top_p", + "parameters.top_k", + "parameters.max_tokens", + "parameters.seed", + } { + if _, ok := byPath[path]; !ok { + t.Errorf("expected parameters field %q not found", path) + } + } + + // Verify TTSConfig fields have tts. prefix + if _, ok := byPath["tts.voice"]; !ok { + t.Error("expected tts.voice field not found") + } +} + +func TestSkipsYAMLDashFields(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + // modelConfigFile has yaml:"-" tag, should be skipped + for _, f := range fields { + if f.Path == "modelConfigFile" || f.Path == "modelTemplate" { + t.Errorf("field %q should have been skipped (yaml:\"-\")", f.Path) + } + } +} + +func TestTypeMapping(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + tests := []struct { + path string + uiType string + pointer bool + }{ + {"name", "string", false}, + {"cuda", "bool", false}, + {"context_size", "int", true}, + {"gpu_layers", "int", true}, + {"threads", "int", true}, + {"f16", "bool", true}, + {"mmap", "bool", true}, + {"stopwords", "[]string", false}, + {"roles", "map", false}, + {"parameters.temperature", "float", true}, + {"parameters.top_k", "int", true}, + {"function.grammar.parallel_calls", "bool", false}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.UIType != tt.uiType { + t.Errorf("field %q: expected UIType %q, got %q", tt.path, tt.uiType, f.UIType) + } + if f.Pointer != tt.pointer { + t.Errorf("field %q: expected Pointer=%v, got %v", tt.path, tt.pointer, f.Pointer) + } + } +} + +func TestSectionAssignment(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + tests := []struct { + path string + section string + }{ + {"name", "general"}, + {"backend", "general"}, + {"context_size", "general"}, // inline LLMConfig -> no prefix -> general + {"parameters.temperature", "parameters"}, + {"template.chat", "templates"}, + {"function.grammar.parallel_calls", "functions"}, + {"diffusers.cuda", "diffusers"}, + {"pipeline.llm", "pipeline"}, + {"reasoning.disable", "reasoning"}, + {"agent.max_iterations", "agent"}, + {"grpc.attempts", "grpc"}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.Section != tt.section { + t.Errorf("field %q: expected section %q, got %q", tt.path, tt.section, f.Section) + } + } +} + +func TestLabelGeneration(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + + byPath := make(map[string]meta.FieldMeta, len(fields)) + for _, f := range fields { + byPath[f.Path] = f + } + + tests := []struct { + path string + label string + }{ + {"context_size", "Context Size"}, + {"gpu_layers", "Gpu Layers"}, + {"name", "Name"}, + {"cuda", "Cuda"}, + } + + for _, tt := range tests { + f, ok := byPath[tt.path] + if !ok { + t.Errorf("field %q not found", tt.path) + continue + } + if f.Label != tt.label { + t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label) + } + } +} + +func TestFieldCount(t *testing.T) { + fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{})) + // We expect a large number of fields (100+) given the config complexity + if len(fields) < 80 { + t.Errorf("expected at least 80 fields, got %d", len(fields)) + } + t.Logf("Total fields discovered: %d", len(fields)) +} diff --git a/core/config/meta/registry.go b/core/config/meta/registry.go new file mode 100644 index 000000000000..bebba468dc2d --- /dev/null +++ b/core/config/meta/registry.go @@ -0,0 +1,314 @@ +package meta + +// DefaultRegistry returns enrichment overrides for the ~30 most commonly used +// config fields. Fields not listed here still appear with auto-generated +// labels and type-inferred components. +func DefaultRegistry() map[string]FieldMetaOverride { + f64 := func(v float64) *float64 { return &v } + + return map[string]FieldMetaOverride{ + // --- General --- + "name": { + Section: "general", + Label: "Model Name", + Description: "Unique identifier for this model configuration", + Component: "input", + Order: 0, + }, + "backend": { + Section: "general", + Label: "Backend", + Description: "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", + Component: "select", + AutocompleteProvider: ProviderBackends, + Order: 1, + }, + "description": { + Section: "general", + Label: "Description", + Description: "Human-readable description of what this model does", + Component: "textarea", + Order: 2, + }, + "usage": { + Section: "general", + Label: "Usage", + Description: "Usage instructions or notes", + Component: "textarea", + Advanced: true, + Order: 3, + }, + "cuda": { + Section: "general", + Label: "CUDA", + Description: "Explicitly enable CUDA acceleration", + Order: 5, + }, + "known_usecases": { + Section: "general", + Label: "Known Use Cases", + Description: "Capabilities this model supports (e.g. FLAG_CHAT, FLAG_COMPLETION)", + Component: "string-list", + Order: 6, + }, + + // --- LLM --- + "context_size": { + Section: "llm", + Label: "Context Size", + Description: "Maximum context window in tokens", + Component: "number", + VRAMImpact: true, + Order: 10, + }, + "gpu_layers": { + Section: "llm", + Label: "GPU Layers", + Description: "Number of layers to offload to GPU (-1 = all)", + Component: "number", + Min: f64(-1), + VRAMImpact: true, + Order: 11, + }, + "threads": { + Section: "llm", + Label: "Threads", + Description: "Number of CPU threads for inference", + Component: "number", + Min: f64(1), + Order: 12, + }, + "f16": { + Section: "llm", + Label: "F16", + Description: "Use 16-bit floating point for key/value cache", + Order: 13, + }, + "mmap": { + Section: "llm", + Label: "Memory Map", + Description: "Use memory-mapped files for model loading", + Order: 14, + }, + "mmlock": { + Section: "llm", + Label: "Memory Lock", + Description: "Lock model memory to prevent swapping", + Advanced: true, + Order: 15, + }, + "low_vram": { + Section: "llm", + Label: "Low VRAM", + Description: "Optimize for systems with limited GPU memory", + VRAMImpact: true, + Order: 16, + }, + "embeddings": { + Section: "llm", + Label: "Embeddings", + Description: "Enable embedding generation mode", + Order: 17, + }, + "quantization": { + Section: "llm", + Label: "Quantization", + Description: "Quantization method (e.g. q4_0, q5_1, q8_0)", + Component: "select", + Options: QuantizationOptions, + Advanced: true, + Order: 20, + }, + "flash_attention": { + Section: "llm", + Label: "Flash Attention", + Description: "Enable flash attention for faster inference", + Component: "input", + Advanced: true, + Order: 21, + }, + "cache_type_k": { + Section: "llm", + Label: "KV Cache Type (K)", + Description: "Quantization type for key cache (e.g. f16, q8_0, q4_0)", + Component: "select", + Options: CacheTypeOptions, + VRAMImpact: true, + Advanced: true, + Order: 22, + }, + "cache_type_v": { + Section: "llm", + Label: "KV Cache Type (V)", + Description: "Quantization type for value cache", + Component: "select", + Options: CacheTypeOptions, + VRAMImpact: true, + Advanced: true, + Order: 23, + }, + + // --- Parameters --- + "parameters.temperature": { + Section: "parameters", + Label: "Temperature", + Description: "Sampling temperature (higher = more creative, lower = more deterministic)", + Component: "slider", + Min: f64(0), + Max: f64(2), + Step: f64(0.05), + Order: 30, + }, + "parameters.top_p": { + Section: "parameters", + Label: "Top P", + Description: "Nucleus sampling threshold", + Component: "slider", + Min: f64(0), + Max: f64(1), + Step: f64(0.01), + Order: 31, + }, + "parameters.top_k": { + Section: "parameters", + Label: "Top K", + Description: "Top-K sampling: consider only the K most likely tokens", + Component: "number", + Min: f64(0), + Order: 32, + }, + "parameters.max_tokens": { + Section: "parameters", + Label: "Max Tokens", + Description: "Maximum number of tokens to generate (0 = unlimited)", + Component: "number", + Min: f64(0), + Order: 33, + }, + "parameters.repeat_penalty": { + Section: "parameters", + Label: "Repeat Penalty", + Description: "Penalize repeated tokens (1.0 = no penalty)", + Component: "number", + Min: f64(0), + Advanced: true, + Order: 34, + }, + "parameters.seed": { + Section: "parameters", + Label: "Seed", + Description: "Random seed (-1 = random)", + Component: "number", + Advanced: true, + Order: 35, + }, + + // --- Templates --- + "template.chat": { + Section: "templates", + Label: "Chat Template", + Description: "Go template for chat completion requests", + Component: "code-editor", + Order: 40, + }, + "template.chat_message": { + Section: "templates", + Label: "Chat Message Template", + Description: "Go template for individual chat messages", + Component: "code-editor", + Order: 41, + }, + "template.completion": { + Section: "templates", + Label: "Completion Template", + Description: "Go template for completion requests", + Component: "code-editor", + Order: 42, + }, + "template.use_tokenizer_template": { + Section: "templates", + Label: "Use Tokenizer Template", + Description: "Use the chat template from the model's tokenizer config", + Order: 43, + }, + + // --- Pipeline --- + "pipeline.llm": { + Section: "pipeline", + Label: "LLM Model", + Description: "Model to use for LLM inference in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsChat, + Order: 60, + }, + "pipeline.tts": { + Section: "pipeline", + Label: "TTS Model", + Description: "Model to use for text-to-speech in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsTTS, + Order: 61, + }, + "pipeline.transcription": { + Section: "pipeline", + Label: "Transcription Model", + Description: "Model to use for speech-to-text in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsTranscript, + Order: 62, + }, + "pipeline.vad": { + Section: "pipeline", + Label: "VAD Model", + Description: "Model to use for voice activity detection in the pipeline", + Component: "model-select", + AutocompleteProvider: ProviderModelsVAD, + Order: 63, + }, + + // --- Functions --- + "function.grammar.parallel_calls": { + Section: "functions", + Label: "Parallel Calls", + Description: "Allow the LLM to return multiple function calls in one response", + Order: 70, + }, + "function.grammar.mixed_mode": { + Section: "functions", + Label: "Mixed Mode", + Description: "Allow the LLM to return both text and function calls", + Order: 71, + }, + "function.grammar.disable": { + Section: "functions", + Label: "Disable Grammar", + Description: "Disable grammar-constrained generation for function calls", + Advanced: true, + Order: 72, + }, + + // --- Diffusers --- + "diffusers.pipeline_type": { + Section: "diffusers", + Label: "Pipeline Type", + Description: "Diffusers pipeline type (e.g. StableDiffusionPipeline)", + Component: "select", + Options: DiffusersPipelineOptions, + Order: 80, + }, + "diffusers.scheduler_type": { + Section: "diffusers", + Label: "Scheduler Type", + Description: "Noise scheduler type", + Component: "select", + Options: DiffusersSchedulerOptions, + Order: 81, + }, + "diffusers.cuda": { + Section: "diffusers", + Label: "CUDA", + Description: "Enable CUDA for diffusers", + Order: 82, + }, + } +} diff --git a/core/config/meta/types.go b/core/config/meta/types.go new file mode 100644 index 000000000000..dcd21fb55806 --- /dev/null +++ b/core/config/meta/types.go @@ -0,0 +1,83 @@ +package meta + +// FieldMeta describes a single configuration field for UI rendering and agent discovery. +type FieldMeta struct { + Path string `json:"path"` // dot-path: "context_size", "function.grammar.parallel_calls" + YAMLKey string `json:"yaml_key"` // leaf yaml key + GoType string `json:"go_type"` // "*int", "string", "[]string" + UIType string `json:"ui_type"` // "string", "int", "float", "bool", "[]string", "map", "object" + Pointer bool `json:"pointer,omitempty"` // true = nil means "not set" + Section string `json:"section"` // "general", "llm", "templates", etc. + Label string `json:"label"` // human-readable label + Description string `json:"description,omitempty"` // help text + Component string `json:"component"` // "input", "number", "toggle", "select", "slider", etc. + Placeholder string `json:"placeholder,omitempty"` + Default any `json:"default,omitempty"` + Min *float64 `json:"min,omitempty"` + Max *float64 `json:"max,omitempty"` + Step *float64 `json:"step,omitempty"` + Options []FieldOption `json:"options,omitempty"` + + AutocompleteProvider string `json:"autocomplete_provider,omitempty"` // "backends", "models:chat", etc. + VRAMImpact bool `json:"vram_impact,omitempty"` + Advanced bool `json:"advanced,omitempty"` + Order int `json:"order"` +} + +// FieldOption represents a choice in a select/enum field. +type FieldOption struct { + Value string `json:"value"` + Label string `json:"label"` +} + +// Section groups related fields in the UI. +type Section struct { + ID string `json:"id"` + Label string `json:"label"` + Icon string `json:"icon,omitempty"` + Order int `json:"order"` +} + +// ConfigMetadata is the top-level response for the metadata API. +type ConfigMetadata struct { + Sections []Section `json:"sections"` + Fields []FieldMeta `json:"fields"` +} + +// FieldMetaOverride holds registry overrides that are merged on top of +// the reflection-discovered defaults. Only non-zero fields override. +type FieldMetaOverride struct { + Section string + Label string + Description string + Component string + Placeholder string + Default any + Min *float64 + Max *float64 + Step *float64 + Options []FieldOption + AutocompleteProvider string + VRAMImpact bool + Advanced bool + Order int +} + +// DefaultSections defines the well-known config sections in display order. +func DefaultSections() []Section { + return []Section{ + {ID: "general", Label: "General", Icon: "settings", Order: 0}, + {ID: "llm", Label: "LLM", Icon: "cpu", Order: 10}, + {ID: "parameters", Label: "Parameters", Icon: "sliders", Order: 20}, + {ID: "templates", Label: "Templates", Icon: "file-text", Order: 30}, + {ID: "functions", Label: "Functions / Tools", Icon: "tool", Order: 40}, + {ID: "reasoning", Label: "Reasoning", Icon: "brain", Order: 45}, + {ID: "diffusers", Label: "Diffusers", Icon: "image", Order: 50}, + {ID: "tts", Label: "TTS", Icon: "volume-2", Order: 55}, + {ID: "pipeline", Label: "Pipeline", Icon: "git-merge", Order: 60}, + {ID: "grpc", Label: "gRPC", Icon: "server", Order: 65}, + {ID: "agent", Label: "Agent", Icon: "bot", Order: 70}, + {ID: "mcp", Label: "MCP", Icon: "plug", Order: 75}, + {ID: "other", Label: "Other", Icon: "more-horizontal", Order: 100}, + } +} diff --git a/core/http/app.go b/core/http/app.go index 94f36c89ddfb..bfedfc34dce8 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -50,6 +50,36 @@ var quietPaths = []string{"/api/operations", "/api/resources", "/healthz", "/rea // @securityDefinitions.apikey BearerAuth // @in header // @name Authorization +// @tag.name inference +// @tag.description Chat completions, text completions, edits, and responses (OpenAI-compatible) +// @tag.name embeddings +// @tag.description Vector embeddings (OpenAI-compatible) +// @tag.name audio +// @tag.description Text-to-speech, transcription, voice activity detection, sound generation +// @tag.name images +// @tag.description Image generation and inpainting +// @tag.name video +// @tag.description Video generation from prompts +// @tag.name detection +// @tag.description Object detection in images +// @tag.name tokenize +// @tag.description Tokenization and token metrics +// @tag.name models +// @tag.description Model gallery browsing, installation, deletion, and listing +// @tag.name backends +// @tag.description Backend gallery browsing, installation, deletion, and listing +// @tag.name config +// @tag.description Model configuration metadata, autocomplete, PATCH updates, VRAM estimation +// @tag.name monitoring +// @tag.description Prometheus metrics, backend status, system information +// @tag.name mcp +// @tag.description Model Context Protocol — tool-augmented chat with MCP servers +// @tag.name agent-jobs +// @tag.description Agent task and job management +// @tag.name p2p +// @tag.description Peer-to-peer networking nodes and tokens +// @tag.name rerank +// @tag.description Document reranking func API(application *application.Application) (*echo.Echo, error) { e := echo.New() diff --git a/core/http/endpoints/anthropic/messages.go b/core/http/endpoints/anthropic/messages.go index adb4b989f5b1..f7a65c93e110 100644 --- a/core/http/endpoints/anthropic/messages.go +++ b/core/http/endpoints/anthropic/messages.go @@ -22,6 +22,7 @@ import ( // MessagesEndpoint is the Anthropic Messages API endpoint // https://docs.anthropic.com/claude/reference/messages_post // @Summary Generate a message response for the given messages and model. +// @Tags inference // @Param request body schema.AnthropicRequest true "query params" // @Success 200 {object} schema.AnthropicResponse "Response" // @Router /v1/messages [post] diff --git a/core/http/endpoints/elevenlabs/soundgeneration.go b/core/http/endpoints/elevenlabs/soundgeneration.go index fa1803649bc8..7034ea042338 100644 --- a/core/http/endpoints/elevenlabs/soundgeneration.go +++ b/core/http/endpoints/elevenlabs/soundgeneration.go @@ -15,6 +15,7 @@ import ( // SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation // @Summary Generates audio from the input text. +// @Tags audio // @Param request body schema.ElevenLabsSoundGenerationRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/sound-generation [post] diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index ff859b04d340..3fc8c8f07602 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -15,6 +15,7 @@ import ( // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech // @Summary Generates audio from the input text. +// @Tags audio // @Param voice-id path string true "Account ID" // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go index 330fb94a4396..6dabd35f6336 100644 --- a/core/http/endpoints/jina/rerank.go +++ b/core/http/endpoints/jina/rerank.go @@ -15,6 +15,7 @@ import ( // JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/) // @Summary Reranks a list of phrases by relevance to a given text query. +// @Tags rerank // @Param request body schema.JINARerankRequest true "query params" // @Success 200 {object} schema.JINARerankResponse "Response" // @Router /v1/rerank [post] diff --git a/core/http/endpoints/localai/agent_jobs.go b/core/http/endpoints/localai/agent_jobs.go index 8ed20d7df446..292a01a0c58c 100644 --- a/core/http/endpoints/localai/agent_jobs.go +++ b/core/http/endpoints/localai/agent_jobs.go @@ -29,6 +29,15 @@ func getJobService(app *application.Application, c echo.Context) *services.Agent return jobSvc } +// CreateTaskEndpoint creates a new agent task definition. +// @Summary Create a new agent task +// @Tags agent-jobs +// @Accept json +// @Produce json +// @Param request body schema.Task true "Task definition" +// @Success 201 {object} map[string]string "id" +// @Failure 400 {object} map[string]string "error" +// @Router /api/agent/tasks [post] func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { var task schema.Task @@ -45,6 +54,17 @@ func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc { } } +// UpdateTaskEndpoint updates an existing agent task. +// @Summary Update an agent task +// @Tags agent-jobs +// @Accept json +// @Produce json +// @Param id path string true "Task ID" +// @Param request body schema.Task true "Updated task definition" +// @Success 200 {object} map[string]string "message" +// @Failure 400 {object} map[string]string "error" +// @Failure 404 {object} map[string]string "error" +// @Router /api/agent/tasks/{id} [put] func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { id := c.Param("id") @@ -64,6 +84,14 @@ func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc { } } +// DeleteTaskEndpoint deletes an agent task. +// @Summary Delete an agent task +// @Tags agent-jobs +// @Produce json +// @Param id path string true "Task ID" +// @Success 200 {object} map[string]string "message" +// @Failure 404 {object} map[string]string "error" +// @Router /api/agent/tasks/{id} [delete] func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { id := c.Param("id") @@ -78,6 +106,13 @@ func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc { } } +// ListTasksEndpoint lists all agent tasks for the current user. +// @Summary List agent tasks +// @Tags agent-jobs +// @Produce json +// @Param all_users query string false "Set to 'true' for admin cross-user listing" +// @Success 200 {object} []schema.Task "tasks" +// @Router /api/agent/tasks [get] func ListTasksEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { jobSvc := getJobService(app, c) @@ -120,6 +155,14 @@ func ListTasksEndpoint(app *application.Application) echo.HandlerFunc { } } +// GetTaskEndpoint returns a single agent task by ID. +// @Summary Get an agent task +// @Tags agent-jobs +// @Produce json +// @Param id path string true "Task ID" +// @Success 200 {object} schema.Task "task" +// @Failure 404 {object} map[string]string "error" +// @Router /api/agent/tasks/{id} [get] func GetTaskEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { id := c.Param("id") @@ -132,6 +175,15 @@ func GetTaskEndpoint(app *application.Application) echo.HandlerFunc { } } +// ExecuteJobEndpoint creates and runs a new job for a task. +// @Summary Execute an agent job +// @Tags agent-jobs +// @Accept json +// @Produce json +// @Param request body schema.JobExecutionRequest true "Job execution request" +// @Success 201 {object} schema.JobExecutionResponse "job created" +// @Failure 400 {object} map[string]string "error" +// @Router /api/agent/jobs/execute [post] func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { var req schema.JobExecutionRequest @@ -167,6 +219,14 @@ func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc { } } +// GetJobEndpoint returns a single job by ID. +// @Summary Get an agent job +// @Tags agent-jobs +// @Produce json +// @Param id path string true "Job ID" +// @Success 200 {object} schema.Job "job" +// @Failure 404 {object} map[string]string "error" +// @Router /api/agent/jobs/{id} [get] func GetJobEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { id := c.Param("id") @@ -179,6 +239,16 @@ func GetJobEndpoint(app *application.Application) echo.HandlerFunc { } } +// ListJobsEndpoint lists jobs, optionally filtered by task or status. +// @Summary List agent jobs +// @Tags agent-jobs +// @Produce json +// @Param task_id query string false "Filter by task ID" +// @Param status query string false "Filter by status (pending, running, completed, failed, cancelled)" +// @Param limit query integer false "Max number of jobs to return" +// @Param all_users query string false "Set to 'true' for admin cross-user listing" +// @Success 200 {object} []schema.Job "jobs" +// @Router /api/agent/jobs [get] func ListJobsEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { var taskID *string @@ -240,6 +310,15 @@ func ListJobsEndpoint(app *application.Application) echo.HandlerFunc { } } +// CancelJobEndpoint cancels a running job. +// @Summary Cancel an agent job +// @Tags agent-jobs +// @Produce json +// @Param id path string true "Job ID" +// @Success 200 {object} map[string]string "message" +// @Failure 400 {object} map[string]string "error" +// @Failure 404 {object} map[string]string "error" +// @Router /api/agent/jobs/{id}/cancel [post] func CancelJobEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { id := c.Param("id") @@ -254,6 +333,14 @@ func CancelJobEndpoint(app *application.Application) echo.HandlerFunc { } } +// DeleteJobEndpoint deletes a job by ID. +// @Summary Delete an agent job +// @Tags agent-jobs +// @Produce json +// @Param id path string true "Job ID" +// @Success 200 {object} map[string]string "message" +// @Failure 404 {object} map[string]string "error" +// @Router /api/agent/jobs/{id} [delete] func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { id := c.Param("id") @@ -268,6 +355,17 @@ func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc { } } +// ExecuteTaskByNameEndpoint looks up a task by name and executes it. +// @Summary Execute an agent task by name +// @Tags agent-jobs +// @Accept json +// @Produce json +// @Param name path string true "Task name" +// @Param parameters body object false "Optional template parameters" +// @Success 201 {object} schema.JobExecutionResponse "job created" +// @Failure 400 {object} map[string]string "error" +// @Failure 404 {object} map[string]string "error" +// @Router /api/agent/tasks/{name}/execute [post] func ExecuteTaskByNameEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { name := c.Param("name") diff --git a/core/http/endpoints/localai/api_skills.go b/core/http/endpoints/localai/api_skills.go new file mode 100644 index 000000000000..44874a8c41c2 --- /dev/null +++ b/core/http/endpoints/localai/api_skills.go @@ -0,0 +1,473 @@ +package localai + +import ( + "encoding/json" + "fmt" + "net/http" + "sort" + "strings" + "sync" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/swagger" +) + +// skillDef is a lightweight skill definition that maps to swagger tags. +type skillDef struct { + Name string `json:"name"` + Description string `json:"description"` + Tags []string `json:"tags"` + Intro string `json:"-"` // brief context not in swagger +} + +var skillDefs = []skillDef{ + { + Name: "chat-inference", + Description: "OpenAI-compatible chat completions, text completions, and embeddings", + Tags: []string{"inference", "embeddings"}, + Intro: "Set \"stream\": true for SSE streaming. Supports tool/function calling when the model config has function templates configured.", + }, + { + Name: "audio", + Description: "Text-to-speech, voice activity detection, transcription, and sound generation", + Tags: []string{"audio"}, + }, + { + Name: "images", + Description: "Image generation and inpainting", + Tags: []string{"images"}, + }, + { + Name: "model-management", + Description: "Browse the gallery, install, delete, and manage models and backends", + Tags: []string{"models", "backends"}, + }, + { + Name: "config-management", + Description: "Discover, read, and modify model configuration fields with VRAM estimation", + Tags: []string{"config"}, + Intro: "Fields with static options include an \"options\" array in metadata. Fields with dynamic values have an \"autocomplete_provider\" for runtime lookup.", + }, + { + Name: "monitoring", + Description: "System metrics, backend status, and system information", + Tags: []string{"monitoring"}, + }, + { + Name: "mcp", + Description: "Model Context Protocol — tool-augmented chat with MCP servers", + Tags: []string{"mcp"}, + Intro: "The model's config must define MCP servers. The endpoint handles tool execution automatically.", + }, + { + Name: "agents", + Description: "Agent task and job management for CI/automation workflows", + Tags: []string{"agent-jobs"}, + }, + { + Name: "video", + Description: "Video generation from text prompts", + Tags: []string{"video"}, + }, +} + +// swaggerState holds parsed swagger spec data, initialised once. +type swaggerState struct { + once sync.Once + spec map[string]any // full parsed swagger JSON + ready bool +} + +var swState swaggerState + +func (s *swaggerState) init() { + s.once.Do(func() { + var spec map[string]any + if err := json.Unmarshal(swagger.SwaggerJSON, &spec); err != nil { + return + } + s.spec = spec + s.ready = true + }) +} + +// filterSwaggerByTags returns a swagger fragment containing only paths whose +// operations carry at least one of the given tags, plus the definitions they +// reference. +func filterSwaggerByTags(spec map[string]any, tags []string) map[string]any { + tagSet := make(map[string]bool, len(tags)) + for _, t := range tags { + tagSet[t] = true + } + + paths, _ := spec["paths"].(map[string]any) + allDefs, _ := spec["definitions"].(map[string]any) + + filteredPaths := make(map[string]any) + for path, methods := range paths { + methodMap, ok := methods.(map[string]any) + if !ok { + continue + } + filteredMethods := make(map[string]any) + for method, opRaw := range methodMap { + op, ok := opRaw.(map[string]any) + if !ok { + continue + } + opTags, _ := op["tags"].([]any) + for _, t := range opTags { + if ts, ok := t.(string); ok && tagSet[ts] { + filteredMethods[method] = op + break + } + } + } + if len(filteredMethods) > 0 { + filteredPaths[path] = filteredMethods + } + } + + // Collect all $ref definitions used by the filtered paths. + neededDefs := make(map[string]bool) + collectRefs(filteredPaths, neededDefs) + + // Resolve nested refs from definitions themselves. + changed := true + for changed { + changed = false + for name := range neededDefs { + if def, ok := allDefs[name]; ok { + before := len(neededDefs) + collectRefs(def, neededDefs) + if len(neededDefs) > before { + changed = true + } + } + } + } + + filteredDefs := make(map[string]any) + for name := range neededDefs { + if def, ok := allDefs[name]; ok { + filteredDefs[name] = def + } + } + + result := map[string]any{ + "paths": filteredPaths, + } + if len(filteredDefs) > 0 { + result["definitions"] = filteredDefs + } + return result +} + +// collectRefs walks a JSON structure and collects all $ref definition names. +func collectRefs(v any, refs map[string]bool) { + switch val := v.(type) { + case map[string]any: + if ref, ok := val["$ref"].(string); ok { + // "#/definitions/schema.OpenAIRequest" -> "schema.OpenAIRequest" + const prefix = "#/definitions/" + if strings.HasPrefix(ref, prefix) { + refs[ref[len(prefix):]] = true + } + } + for _, child := range val { + collectRefs(child, refs) + } + case []any: + for _, child := range val { + collectRefs(child, refs) + } + } +} + +// swaggerToMarkdown renders a filtered swagger fragment into concise markdown. +func swaggerToMarkdown(skillName, intro string, fragment map[string]any) string { + var b strings.Builder + b.WriteString("# ") + b.WriteString(skillName) + b.WriteString("\n") + if intro != "" { + b.WriteString("\n") + b.WriteString(intro) + b.WriteString("\n") + } + + paths, _ := fragment["paths"].(map[string]any) + defs, _ := fragment["definitions"].(map[string]any) + + // Sort paths for stable output. + sortedPaths := make([]string, 0, len(paths)) + for p := range paths { + sortedPaths = append(sortedPaths, p) + } + sort.Strings(sortedPaths) + + for _, path := range sortedPaths { + methods, ok := paths[path].(map[string]any) + if !ok { + continue + } + sortedMethods := sortMethods(methods) + for _, method := range sortedMethods { + op, ok := methods[method].(map[string]any) + if !ok { + continue + } + summary, _ := op["summary"].(string) + b.WriteString(fmt.Sprintf("\n## %s %s\n", strings.ToUpper(method), path)) + if summary != "" { + b.WriteString(summary) + b.WriteString("\n") + } + + // Parameters + params, _ := op["parameters"].([]any) + bodyParams, nonBodyParams := splitParams(params) + + if len(nonBodyParams) > 0 { + b.WriteString("\n**Parameters:**\n") + b.WriteString("| Name | In | Type | Required | Description |\n") + b.WriteString("|------|----|------|----------|-------------|\n") + for _, p := range nonBodyParams { + pm, ok := p.(map[string]any) + if !ok { + continue + } + name, _ := pm["name"].(string) + in, _ := pm["in"].(string) + typ, _ := pm["type"].(string) + req, _ := pm["required"].(bool) + desc, _ := pm["description"].(string) + b.WriteString(fmt.Sprintf("| %s | %s | %s | %v | %s |\n", name, in, typ, req, desc)) + } + } + + if len(bodyParams) > 0 { + for _, p := range bodyParams { + pm, ok := p.(map[string]any) + if !ok { + continue + } + schema, _ := pm["schema"].(map[string]any) + refName := resolveRefName(schema) + if refName != "" { + b.WriteString(fmt.Sprintf("\n**Request body** (`%s`):\n", refName)) + renderSchemaFields(&b, refName, defs) + } + } + } + + // Responses + responses, _ := op["responses"].(map[string]any) + if len(responses) > 0 { + sortedCodes := make([]string, 0, len(responses)) + for code := range responses { + sortedCodes = append(sortedCodes, code) + } + sort.Strings(sortedCodes) + for _, code := range sortedCodes { + resp, ok := responses[code].(map[string]any) + if !ok { + continue + } + desc, _ := resp["description"].(string) + respSchema, _ := resp["schema"].(map[string]any) + refName := resolveRefName(respSchema) + if refName != "" { + b.WriteString(fmt.Sprintf("\n**Response %s** (`%s`): %s\n", code, refName, desc)) + renderSchemaFields(&b, refName, defs) + } else if desc != "" { + b.WriteString(fmt.Sprintf("\n**Response %s**: %s\n", code, desc)) + } + } + } + } + } + + return b.String() +} + +// sortMethods returns HTTP methods in a conventional order. +func sortMethods(methods map[string]any) []string { + order := map[string]int{"get": 0, "post": 1, "put": 2, "patch": 3, "delete": 4} + keys := make([]string, 0, len(methods)) + for k := range methods { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { + oi, oki := order[keys[i]] + oj, okj := order[keys[j]] + if !oki { + oi = 99 + } + if !okj { + oj = 99 + } + return oi < oj + }) + return keys +} + +// splitParams separates body parameters from non-body parameters. +func splitParams(params []any) (body, nonBody []any) { + for _, p := range params { + pm, ok := p.(map[string]any) + if !ok { + continue + } + if in, _ := pm["in"].(string); in == "body" { + body = append(body, p) + } else { + nonBody = append(nonBody, p) + } + } + return +} + +// resolveRefName extracts the definition name from a $ref or returns "". +func resolveRefName(schema map[string]any) string { + if schema == nil { + return "" + } + if ref, ok := schema["$ref"].(string); ok { + const prefix = "#/definitions/" + if strings.HasPrefix(ref, prefix) { + return ref[len(prefix):] + } + } + return "" +} + +// renderSchemaFields writes a markdown field table for a definition. +func renderSchemaFields(b *strings.Builder, defName string, defs map[string]any) { + if defs == nil { + return + } + def, ok := defs[defName].(map[string]any) + if !ok { + return + } + props, ok := def["properties"].(map[string]any) + if !ok || len(props) == 0 { + return + } + + // Sort fields + fields := make([]string, 0, len(props)) + for f := range props { + fields = append(fields, f) + } + sort.Strings(fields) + + b.WriteString("| Field | Type | Description |\n") + b.WriteString("|-------|------|-------------|\n") + for _, field := range fields { + prop, ok := props[field].(map[string]any) + if !ok { + continue + } + typ := schemaTypeString(prop) + desc, _ := prop["description"].(string) + b.WriteString(fmt.Sprintf("| %s | %s | %s |\n", field, typ, desc)) + } +} + +// schemaTypeString returns a human-readable type string for a schema property. +func schemaTypeString(prop map[string]any) string { + if ref := resolveRefName(prop); ref != "" { + return ref + } + typ, _ := prop["type"].(string) + if typ == "array" { + items, _ := prop["items"].(map[string]any) + if items != nil { + if ref := resolveRefName(items); ref != "" { + return "[]" + ref + } + it, _ := items["type"].(string) + if it != "" { + return "[]" + it + } + } + return "[]any" + } + if typ != "" { + return typ + } + return "object" +} + +// APISkillResponse is the JSON response for a single skill (?format=json). +type APISkillResponse struct { + Name string `json:"name"` + Description string `json:"description"` + Tags []string `json:"tags"` + SwaggerFragment map[string]any `json:"swagger_fragment,omitempty"` +} + +// ListAPISkillsEndpoint returns all skills (compact list without guides). +func ListAPISkillsEndpoint() echo.HandlerFunc { + return func(c echo.Context) error { + type compactSkill struct { + Name string `json:"name"` + Description string `json:"description"` + Tags []string `json:"tags"` + URL string `json:"url"` + } + skills := make([]compactSkill, len(skillDefs)) + for i, s := range skillDefs { + skills[i] = compactSkill{ + Name: s.Name, + Description: s.Description, + Tags: s.Tags, + URL: "/api/skills/" + s.Name, + } + } + return c.JSON(http.StatusOK, map[string]any{ + "skills": skills, + "hint": "Fetch GET {url} for a markdown API guide. Add ?format=json for a raw OpenAPI fragment.", + }) + } +} + +// GetAPISkillEndpoint returns a single skill by name. +// Query parameter ?format=json returns a filtered swagger fragment; +// default (markdown) returns a human/LLM-readable guide. +func GetAPISkillEndpoint() echo.HandlerFunc { + byName := make(map[string]*skillDef, len(skillDefs)) + for i := range skillDefs { + byName[skillDefs[i].Name] = &skillDefs[i] + } + + return func(c echo.Context) error { + name := c.Param("name") + skill, ok := byName[name] + if !ok { + return c.JSON(http.StatusNotFound, map[string]any{"error": "skill not found: " + name}) + } + + swState.init() + if !swState.ready { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "swagger spec not available"}) + } + + fragment := filterSwaggerByTags(swState.spec, skill.Tags) + + format := c.QueryParam("format") + if format == "json" { + return c.JSON(http.StatusOK, APISkillResponse{ + Name: skill.Name, + Description: skill.Description, + Tags: skill.Tags, + SwaggerFragment: fragment, + }) + } + + guide := swaggerToMarkdown(skill.Name, skill.Intro, fragment) + return c.Blob(http.StatusOK, "text/markdown; charset=utf-8", []byte(guide)) + } +} diff --git a/core/http/endpoints/localai/backend.go b/core/http/endpoints/localai/backend.go index f804f1b35c73..63e31cb5c7c5 100644 --- a/core/http/endpoints/localai/backend.go +++ b/core/http/endpoints/localai/backend.go @@ -37,6 +37,7 @@ func CreateBackendEndpointService(galleries []config.Gallery, systemState *syste // GetOpStatusEndpoint returns the job status // @Summary Returns the job status +// @Tags backends // @Success 200 {object} services.GalleryOpStatus "Response" // @Router /backends/jobs/{uuid} [get] func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc { @@ -51,6 +52,7 @@ func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc { // GetAllStatusEndpoint returns all the jobs status progress // @Summary Returns all the jobs status progress +// @Tags backends // @Success 200 {object} map[string]services.GalleryOpStatus "Response" // @Router /backends/jobs [get] func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc { @@ -61,6 +63,7 @@ func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc { // ApplyBackendEndpoint installs a new backend to a LocalAI instance // @Summary Install backends to LocalAI. +// @Tags backends // @Param request body GalleryBackend true "query params" // @Success 200 {object} schema.BackendResponse "Response" // @Router /backends/apply [post] @@ -88,6 +91,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() echo.HandlerFunc { // DeleteBackendEndpoint lets delete backends from a LocalAI instance // @Summary delete backends from LocalAI. +// @Tags backends // @Param name path string true "Backend name" // @Success 200 {object} schema.BackendResponse "Response" // @Router /backends/delete/{name} [post] @@ -112,6 +116,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() echo.HandlerFunc { // ListBackendsEndpoint list the available backends configured in LocalAI // @Summary List all Backends +// @Tags backends // @Success 200 {object} []gallery.GalleryBackend "Response" // @Router /backends [get] func (mgs *BackendEndpointService) ListBackendsEndpoint(systemState *system.SystemState) echo.HandlerFunc { @@ -126,6 +131,7 @@ func (mgs *BackendEndpointService) ListBackendsEndpoint(systemState *system.Syst // ListModelGalleriesEndpoint list the available galleries configured in LocalAI // @Summary List all Galleries +// @Tags backends // @Success 200 {object} []config.Gallery "Response" // @Router /backends/galleries [get] // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents! @@ -142,6 +148,7 @@ func (mgs *BackendEndpointService) ListBackendGalleriesEndpoint() echo.HandlerFu // ListAvailableBackendsEndpoint list the available backends in the galleries configured in LocalAI // @Summary List all available Backends +// @Tags backends // @Success 200 {object} []gallery.GalleryBackend "Response" // @Router /backends/available [get] func (mgs *BackendEndpointService) ListAvailableBackendsEndpoint(systemState *system.SystemState) echo.HandlerFunc { diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index 18016c579220..29b6f39810fe 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -8,6 +8,7 @@ import ( // BackendMonitorEndpoint returns the status of the specified backend // @Summary Backend monitor endpoint +// @Tags monitoring // @Param request body schema.BackendMonitorRequest true "Backend statistics request" // @Success 200 {object} proto.StatusResponse "Response" // @Router /backend/monitor [get] @@ -29,7 +30,8 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) echo.HandlerFunc } // BackendShutdownEndpoint shuts down the specified backend -// @Summary Backend monitor endpoint +// @Summary Backend shutdown endpoint +// @Tags monitoring // @Param request body schema.BackendMonitorRequest true "Backend statistics request" // @Router /backend/shutdown [post] func BackendShutdownEndpoint(bm *services.BackendMonitorService) echo.HandlerFunc { diff --git a/core/http/endpoints/localai/config_meta.go b/core/http/endpoints/localai/config_meta.go new file mode 100644 index 000000000000..8f9083c8fa71 --- /dev/null +++ b/core/http/endpoints/localai/config_meta.go @@ -0,0 +1,251 @@ +package localai + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "reflect" + "sort" + "strings" + + "dario.cat/mergo" + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/config/meta" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/services" + "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/utils" + "github.com/mudler/xlog" + "gopkg.in/yaml.v3" +) + +// ConfigMetadataEndpoint returns field metadata for config fields. +// Without ?section, returns just the section index (lightweight). +// With ?section=, returns fields for that section only. +// With ?section=all, returns all fields grouped by section. +// @Summary List model configuration field metadata +// @Description Returns config field metadata. Use ?section= to filter by section, or omit for a section index. +// @Tags config +// @Produce json +// @Param section query string false "Section ID to filter (e.g. 'general', 'llm', 'parameters') or 'all' for everything" +// @Success 200 {object} map[string]any "Section index or filtered field metadata" +// @Router /api/models/config-metadata [get] +func ConfigMetadataEndpoint() echo.HandlerFunc { + return func(c echo.Context) error { + sectionParam := c.QueryParam("section") + + // No section param: return lightweight section index. + if sectionParam == "" { + sections := meta.DefaultSections() + type sectionInfo struct { + ID string `json:"id"` + Label string `json:"label"` + URL string `json:"url"` + } + index := make([]sectionInfo, len(sections)) + for i, s := range sections { + index[i] = sectionInfo{ + ID: s.ID, + Label: s.Label, + URL: "/api/models/config-metadata?section=" + s.ID, + } + } + return c.JSON(http.StatusOK, map[string]any{ + "hint": "Fetch a section URL to see its fields. Use ?section=all for everything.", + "sections": index, + }) + } + + md := meta.BuildConfigMetadata(reflect.TypeOf(config.ModelConfig{})) + + // section=all: return everything. + if sectionParam == "all" { + return c.JSON(http.StatusOK, md) + } + + // Filter to requested section. + var filtered []meta.FieldMeta + for _, f := range md.Fields { + if f.Section == sectionParam { + filtered = append(filtered, f) + } + } + if len(filtered) == 0 { + return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown section: " + sectionParam}) + } + return c.JSON(http.StatusOK, filtered) + } +} + +// AutocompleteEndpoint handles dynamic autocomplete lookups for config fields. +// Static option lists (quantizations, cache types, diffusers pipelines/schedulers) +// are embedded directly in the field metadata Options; only truly dynamic values +// that require runtime lookup are served here. +// @Summary Get dynamic autocomplete values for a config field +// @Description Returns runtime-resolved values for dynamic providers (backends, models) +// @Tags config +// @Produce json +// @Param provider path string true "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)" +// @Success 200 {object} map[string]any "values array" +// @Router /api/models/config-metadata/autocomplete/{provider} [get] +func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + provider := c.Param("provider") + var values []string + + switch { + case provider == meta.ProviderBackends: + installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState) + if err == nil { + for name := range installedBackends { + values = append(values, name) + } + } + sort.Strings(values) + + case provider == meta.ProviderModels: + modelConfigs := cl.GetAllModelsConfigs() + for _, cfg := range modelConfigs { + values = append(values, cfg.Name) + } + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) + values = append(values, modelsWithoutConfig...) + sort.Strings(values) + + case strings.HasPrefix(provider, "models:"): + capability := strings.TrimPrefix(provider, "models:") + var filterFn config.ModelConfigFilterFn + switch capability { + case "chat": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT) + case "tts": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS) + case "vad": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD) + case "transcript": + filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT) + default: + filterFn = config.NoFilterFn + } + filteredConfigs := cl.GetModelConfigsByFilter(filterFn) + for _, cfg := range filteredConfigs { + values = append(values, cfg.Name) + } + sort.Strings(values) + + default: + return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown provider: " + provider}) + } + + return c.JSON(http.StatusOK, map[string]any{"values": values}) + } +} + +// PatchConfigEndpoint handles PATCH requests to partially update a model config +// using nested JSON merge. +// @Summary Partially update a model configuration +// @Description Deep-merges the JSON patch body into the existing model config +// @Tags config +// @Accept json +// @Produce json +// @Param name path string true "Model name" +// @Success 200 {object} map[string]any "success message" +// @Router /api/models/config-json/{name} [patch] +func PatchConfigEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("name") + if decoded, err := url.PathUnescape(modelName); err == nil { + modelName = decoded + } + if modelName == "" { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"}) + } + + modelConfig, exists := cl.GetModelConfig(modelName) + if !exists { + return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"}) + } + + // Read patch body + patchBody, err := io.ReadAll(c.Request().Body) + if err != nil || len(patchBody) == 0 { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "request body is empty or unreadable"}) + } + + // Validate patch body is valid JSON + var patchMap map[string]any + if err := json.Unmarshal(patchBody, &patchMap); err != nil { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid JSON: " + err.Error()}) + } + + // Marshal existing config to JSON + existingJSON, err := json.Marshal(modelConfig) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal existing config"}) + } + + // Deep-merge patch into existing + var existingMap map[string]any + if err := json.Unmarshal(existingJSON, &existingMap); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to parse existing config"}) + } + + if err := mergo.Merge(&existingMap, patchMap, mergo.WithOverride); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to merge configs: " + err.Error()}) + } + + // Marshal merged config back to JSON + mergedJSON, err := json.Marshal(existingMap) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal merged config"}) + } + + // Unmarshal to ModelConfig for validation + var updatedConfig config.ModelConfig + if err := json.Unmarshal(mergedJSON, &updatedConfig); err != nil { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "merged config is invalid: " + err.Error()}) + } + + // Validate + if valid, err := updatedConfig.Validate(); !valid { + errMsg := "validation failed" + if err != nil { + errMsg = err.Error() + } + return c.JSON(http.StatusBadRequest, map[string]any{"error": errMsg}) + } + + // Write as YAML to disk + configPath := modelConfig.GetModelConfigFile() + if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil { + return c.JSON(http.StatusForbidden, map[string]any{"error": "config path not trusted: " + err.Error()}) + } + + yamlData, err := yaml.Marshal(updatedConfig) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal YAML"}) + } + + if err := os.WriteFile(configPath, yamlData, 0644); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to write config file"}) + } + + // Reload configs + if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to reload configs: " + err.Error()}) + } + + if err := cl.Preload(appConfig.SystemState.Model.ModelsPath); err != nil { + xlog.Warn("Failed to preload after PATCH", "error", err) + } + + return c.JSON(http.StatusOK, map[string]any{ + "success": true, + "message": fmt.Sprintf("Model '%s' updated successfully", modelName), + }) + } +} diff --git a/core/http/endpoints/localai/detection.go b/core/http/endpoints/localai/detection.go index 77a0c7256526..3dcbbde6d783 100644 --- a/core/http/endpoints/localai/detection.go +++ b/core/http/endpoints/localai/detection.go @@ -13,6 +13,7 @@ import ( // DetectionEndpoint is the LocalAI Detection endpoint https://localai.io/docs/api-reference/detection // @Summary Detects objects in the input image. +// @Tags detection // @Param request body schema.DetectionRequest true "query params" // @Success 200 {object} schema.DetectionResponse "Response" // @Router /v1/detection [post] diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index 5c87e6d05e19..56bff7272511 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -40,6 +40,7 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, backendGaller // GetOpStatusEndpoint returns the job status // @Summary Returns the job status +// @Tags models // @Success 200 {object} services.GalleryOpStatus "Response" // @Router /models/jobs/{uuid} [get] func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc { @@ -54,6 +55,7 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc { // GetAllStatusEndpoint returns all the jobs status progress // @Summary Returns all the jobs status progress +// @Tags models // @Success 200 {object} map[string]services.GalleryOpStatus "Response" // @Router /models/jobs [get] func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc { @@ -64,6 +66,7 @@ func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc // ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery // @Summary Install models to LocalAI. +// @Tags models // @Param request body GalleryModel true "query params" // @Success 200 {object} schema.GalleryResponse "Response" // @Router /models/apply [post] @@ -93,6 +96,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() echo.Handler // DeleteModelGalleryEndpoint lets delete models from a LocalAI instance // @Summary delete models to LocalAI. +// @Tags models // @Param name path string true "Model name" // @Success 200 {object} schema.GalleryResponse "Response" // @Router /models/delete/{name} [post] @@ -118,7 +122,8 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() echo.Handle // ListModelFromGalleryEndpoint list the available models for installation from the active galleries // @Summary List installable models. -// @Success 200 {object} []gallery.GalleryModel "Response" +// @Tags models +// @Success 200 {object} []gallery.Metadata "Response" // @Router /models/available [get] func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState *system.SystemState) echo.HandlerFunc { return func(c echo.Context) error { @@ -149,6 +154,7 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState // ListModelGalleriesEndpoint list the available galleries configured in LocalAI // @Summary List all Galleries +// @Tags models // @Success 200 {object} []config.Gallery "Response" // @Router /models/galleries [get] // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents! diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go index 69c408e50b76..36b0301b78f9 100644 --- a/core/http/endpoints/localai/get_token_metrics.go +++ b/core/http/endpoints/localai/get_token_metrics.go @@ -16,6 +16,7 @@ import ( // TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID // // @Summary Get TokenMetrics for Active Slot. +// @Tags tokenize // @Accept json // @Produce audio/x-wav // @Success 200 {string} binary "generated audio/wav file" diff --git a/core/http/endpoints/localai/mcp.go b/core/http/endpoints/localai/mcp.go index 0ff75f4a9392..ef879a737ecd 100644 --- a/core/http/endpoints/localai/mcp.go +++ b/core/http/endpoints/localai/mcp.go @@ -52,6 +52,7 @@ type MCPErrorEvent struct { // which handles MCP tool injection and server-side execution. // Both streaming and non-streaming modes use standard OpenAI response format. // @Summary MCP chat completions with automatic tool execution +// @Tags mcp // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/mcp/chat/completions [post] diff --git a/core/http/endpoints/localai/metrics.go b/core/http/endpoints/localai/metrics.go index a5f08a7f6444..9027e738921f 100644 --- a/core/http/endpoints/localai/metrics.go +++ b/core/http/endpoints/localai/metrics.go @@ -10,7 +10,9 @@ import ( // LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI // @Summary Prometheus metrics endpoint -// @Param request body config.Gallery true "Gallery details" +// @Tags monitoring +// @Produce text/plain +// @Success 200 {string} string "Prometheus metrics" // @Router /metrics [get] func LocalAIMetricsEndpoint() echo.HandlerFunc { return echo.WrapHandler(promhttp.Handler()) diff --git a/core/http/endpoints/localai/p2p.go b/core/http/endpoints/localai/p2p.go index cc630be4f440..e168b6f9a196 100644 --- a/core/http/endpoints/localai/p2p.go +++ b/core/http/endpoints/localai/p2p.go @@ -9,6 +9,7 @@ import ( // ShowP2PNodes returns the P2P Nodes // @Summary Returns available P2P nodes +// @Tags p2p // @Success 200 {object} []schema.P2PNodesResponse "Response" // @Router /api/p2p [get] func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc { @@ -24,6 +25,7 @@ func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc { // ShowP2PToken returns the P2P token // @Summary Show the P2P token +// @Tags p2p // @Success 200 {string} string "Response" // @Router /api/p2p/token [get] func ShowP2PToken(appConfig *config.ApplicationConfig) echo.HandlerFunc { diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go index a3831e18483a..3e5652117bc5 100644 --- a/core/http/endpoints/localai/system.go +++ b/core/http/endpoints/localai/system.go @@ -9,6 +9,7 @@ import ( // SystemInformations returns the system informations // @Summary Show the LocalAI instance information +// @Tags monitoring // @Success 200 {object} schema.SystemInformationResponse "Response" // @Router /system [get] func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { diff --git a/core/http/endpoints/localai/tokenize.go b/core/http/endpoints/localai/tokenize.go index 23eec48c7545..439b7a7e618a 100644 --- a/core/http/endpoints/localai/tokenize.go +++ b/core/http/endpoints/localai/tokenize.go @@ -11,6 +11,7 @@ import ( // TokenizeEndpoint exposes a REST API to tokenize the content // @Summary Tokenize the input. +// @Tags tokenize // @Param request body schema.TokenizeRequest true "Request" // @Success 200 {object} schema.TokenizeResponse "Response" // @Router /v1/tokenize [post] diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 49bba5528bb1..40e4881910f7 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -17,6 +17,7 @@ import ( // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech // // @Summary Generates audio from the input text. +// @Tags audio // @Accept json // @Produce audio/x-wav // @Param request body schema.TTSRequest true "query params" diff --git a/core/http/endpoints/localai/vad.go b/core/http/endpoints/localai/vad.go index 155574c85102..84521af2462c 100644 --- a/core/http/endpoints/localai/vad.go +++ b/core/http/endpoints/localai/vad.go @@ -12,6 +12,7 @@ import ( // VADEndpoint is Voice-Activation-Detection endpoint // @Summary Detect voice fragments in an audio stream +// @Tags audio // @Accept json // @Param request body schema.VADRequest true "query params" // @Success 200 {object} proto.VADResponse "Response" diff --git a/core/http/endpoints/localai/video.go b/core/http/endpoints/localai/video.go index da33a03734ac..8a65ae5fd9e0 100644 --- a/core/http/endpoints/localai/video.go +++ b/core/http/endpoints/localai/video.go @@ -62,6 +62,7 @@ func downloadFile(url string) (string, error) { */ // VideoEndpoint // @Summary Creates a video given a prompt. +// @Tags video // @Param request body schema.VideoRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /video [post] diff --git a/core/http/endpoints/localai/vram.go b/core/http/endpoints/localai/vram.go new file mode 100644 index 000000000000..e31c2d12f9e5 --- /dev/null +++ b/core/http/endpoints/localai/vram.go @@ -0,0 +1,154 @@ +package localai + +import ( + "context" + "fmt" + "net/http" + "path" + "path/filepath" + "strings" + "time" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/vram" +) + +type vramEstimateRequest struct { + Model string `json:"model"` // model name (must be installed) + ContextSize uint32 `json:"context_size,omitempty"` // context length to estimate for (default 8192) + GPULayers int `json:"gpu_layers,omitempty"` // number of layers to offload to GPU (0 = all) + KVQuantBits int `json:"kv_quant_bits,omitempty"` // KV cache quantization bits (0 = fp16) +} + +type vramEstimateResponse struct { + vram.EstimateResult + ContextNote string `json:"context_note,omitempty"` // note when context_size was defaulted + ModelMaxContext uint64 `json:"model_max_context,omitempty"` // model's trained maximum context length +} + +// resolveModelURI converts a relative model path to a file:// URI so the +// size resolver can stat it on disk. URIs that already have a scheme are +// returned unchanged. +func resolveModelURI(uri, modelsPath string) string { + if strings.Contains(uri, "://") { + return uri + } + return "file://" + filepath.Join(modelsPath, uri) +} + +// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an +// installed model configuration. For uninstalled models (gallery URLs), use +// the gallery-level estimates in /api/models instead. +// @Summary Estimate VRAM usage for a model +// @Description Estimates VRAM based on model weight files, context size, and GPU layers +// @Tags config +// @Accept json +// @Produce json +// @Param request body vramEstimateRequest true "VRAM estimation parameters" +// @Success 200 {object} vramEstimateResponse "VRAM estimate" +// @Router /api/models/vram-estimate [post] +func VRAMEstimateEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true} + + return func(c echo.Context) error { + var req vramEstimateRequest + if err := c.Bind(&req); err != nil { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid request body"}) + } + + if req.Model == "" { + return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"}) + } + + modelConfig, exists := cl.GetModelConfig(req.Model) + if !exists { + return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"}) + } + + modelsPath := appConfig.SystemState.Model.ModelsPath + + // Build file inputs from the model's download files + var files []vram.FileInput + var firstGGUF string + for _, f := range modelConfig.DownloadFiles { + ext := strings.ToLower(path.Ext(path.Base(f.Filename))) + if weightExts[ext] { + resolved := resolveModelURI(string(f.URI), modelsPath) + files = append(files, vram.FileInput{URI: resolved, Size: 0}) + if firstGGUF == "" && ext == ".gguf" { + firstGGUF = resolved + } + } + } + + // Also include the main model file if it looks like a weight file + if modelConfig.Model != "" { + ext := strings.ToLower(path.Ext(path.Base(modelConfig.Model))) + if weightExts[ext] { + resolved := resolveModelURI(modelConfig.Model, modelsPath) + files = append(files, vram.FileInput{URI: resolved, Size: 0}) + if firstGGUF == "" && ext == ".gguf" { + firstGGUF = resolved + } + } + } + + // Include the vision projector (mmproj) file — it is loaded entirely + // into GPU VRAM alongside the main model. + if modelConfig.MMProj != "" { + ext := strings.ToLower(path.Ext(path.Base(modelConfig.MMProj))) + if weightExts[ext] { + resolved := resolveModelURI(modelConfig.MMProj, modelsPath) + files = append(files, vram.FileInput{URI: resolved, Size: 0}) + } + } + + if len(files) == 0 { + return c.JSON(http.StatusOK, map[string]any{ + "message": "no weight files found for estimation", + }) + } + + contextDefaulted := false + opts := vram.EstimateOptions{ + ContextLength: req.ContextSize, + GPULayers: req.GPULayers, + KVQuantBits: req.KVQuantBits, + } + if opts.ContextLength == 0 { + if modelConfig.ContextSize != nil { + opts.ContextLength = uint32(*modelConfig.ContextSize) + } else { + opts.ContextLength = 8192 + contextDefaulted = true + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + result, err := vram.Estimate(ctx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader()) + if err != nil { + return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()}) + } + + resp := vramEstimateResponse{EstimateResult: result} + + // When context was defaulted to 8192, read the GGUF metadata to report + // the model's trained maximum context length so callers know the estimate + // may be conservative. + if contextDefaulted && firstGGUF != "" { + ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(ctx, firstGGUF) + if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 { + resp.ModelMaxContext = ggufMeta.MaximumContextLength + resp.ContextNote = fmt.Sprintf( + "Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.", + ggufMeta.MaximumContextLength, + ) + } + } + + return c.JSON(http.StatusOK, resp) + } +} diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 871084054284..da84f58b6209 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -56,6 +56,7 @@ func mergeToolCallDeltas(existing []schema.ToolCall, deltas []schema.ToolCall) [ // ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create // @Summary Generate a chat completions for a given prompt and model. +// @Tags inference // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 7b094cb3bf22..069bc33a60f5 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -21,6 +21,7 @@ import ( // CompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions // @Summary Generate completions for a given prompt and model. +// @Tags inference // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/completions [post] diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index 917a05a2455e..c74e2d1f7546 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -20,6 +20,7 @@ import ( // EditEndpoint is the OpenAI edit API endpoint // @Summary OpenAI edit endpoint +// @Tags inference // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/edits [post] diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index b88f3eb03795..04ffbf1facec 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -18,6 +18,7 @@ import ( // EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings // @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. +// @Tags embeddings // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/embeddings [post] diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 32834a923331..19e530d7c52e 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -68,6 +68,7 @@ func downloadFile(url string) (string, error) { */ // ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create // @Summary Creates an image given a prompt. +// @Tags images // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/images/generations [post] diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 1f722bacf90e..ff0b08225392 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -12,6 +12,7 @@ import ( // ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models // @Summary List and describe the various models available in the API. +// @Tags models // @Success 200 {object} schema.ModelsDataResponse "Response" // @Router /v1/models [get] func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, db ...*gorm.DB) echo.HandlerFunc { diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go index c52fe1914c2d..ffbfb9faac4d 100644 --- a/core/http/endpoints/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -21,6 +21,7 @@ import ( // TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create // @Summary Transcribes audio into the input language. +// @Tags audio // @accept multipart/form-data // @Param model formData string true "model" // @Param file formData file true "file" diff --git a/core/http/endpoints/openresponses/responses.go b/core/http/endpoints/openresponses/responses.go index 37c59b5687c4..dc2e1301560d 100644 --- a/core/http/endpoints/openresponses/responses.go +++ b/core/http/endpoints/openresponses/responses.go @@ -26,6 +26,7 @@ import ( // ResponsesEndpoint is the Open Responses API endpoint // https://www.openresponses.org/specification // @Summary Create a response using the Open Responses API +// @Tags inference // @Param request body schema.OpenResponsesRequest true "Request body" // @Success 200 {object} schema.ORResponseResource "Response" // @Router /v1/responses [post] @@ -3000,6 +3001,7 @@ func convertORToolsToOpenAIFormat(orTools []schema.ORFunctionTool) []functions.T // GetResponseEndpoint returns a handler for GET /responses/:id // This endpoint is used for polling background responses or resuming streaming // @Summary Get a response by ID +// @Tags inference // @Description Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses. // @Param id path string true "Response ID" // @Param stream query string false "Set to 'true' to resume streaming" @@ -3141,6 +3143,7 @@ func handleStreamResume(c echo.Context, store *ResponseStore, responseID string, // CancelResponseEndpoint returns a handler for POST /responses/:id/cancel // This endpoint cancels a background response if it's still in progress // @Summary Cancel a response +// @Tags inference // @Description Cancel a background response if it's still in progress // @Param id path string true "Response ID" // @Success 200 {object} schema.ORResponseResource "Response" diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index c91986ab6439..5972fb6153ca 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -132,6 +132,113 @@ func RegisterLocalAIRoutes(router *echo.Echo, }{Version: internal.PrintableVersion()}) }) + // Agent discovery endpoint + router.GET("/.well-known/localai.json", func(c echo.Context) error { + return c.JSON(200, map[string]any{ + "version": internal.PrintableVersion(), + // Flat endpoint list for backwards compatibility + "endpoints": map[string]any{ + "models": "/v1/models", + "chat_completions": "/v1/chat/completions", + "completions": "/v1/completions", + "embeddings": "/v1/embeddings", + "config_metadata": "/api/models/config-metadata", + "config_json": "/api/models/config-json/:name", + "config_patch": "/api/models/config-json/:name", + "autocomplete": "/api/models/config-metadata/autocomplete/:provider", + "vram_estimate": "/api/models/vram-estimate", + "tts": "/tts", + "transcription": "/v1/audio/transcriptions", + "image_generation": "/v1/images/generations", + "swagger": "/swagger/index.html", + "skills": "/api/skills", + }, + // Categorized endpoint groups for structured discovery + "endpoint_groups": map[string]any{ + "openai_compatible": map[string]string{ + "models": "/v1/models", + "chat_completions": "/v1/chat/completions", + "completions": "/v1/completions", + "embeddings": "/v1/embeddings", + "transcription": "/v1/audio/transcriptions", + "image_generation": "/v1/images/generations", + }, + "config_management": map[string]string{ + "config_metadata": "/api/models/config-metadata", + "config_json": "/api/models/config-json/:name", + "config_patch": "/api/models/config-json/:name", + "autocomplete": "/api/models/config-metadata/autocomplete/:provider", + "vram_estimate": "/api/models/vram-estimate", + }, + "model_management": map[string]string{ + "list_gallery": "/models/available", + "install": "/models/apply", + "delete": "/models/delete/:name", + "edit": "/models/edit/:name", + "import": "/models/import", + "reload": "/models/reload", + }, + "ai_functions": map[string]string{ + "tts": "/tts", + "vad": "/vad", + "video": "/video", + "detection": "/v1/detection", + "tokenize": "/v1/tokenize", + }, + "monitoring": map[string]string{ + "metrics": "/metrics", + "backend_monitor": "/backend/monitor", + "backend_shutdown": "/backend/shutdown", + "system": "/system", + "version": "/version", + "traces": "/api/traces", + }, + "mcp": map[string]string{ + "chat_completions": "/v1/mcp/chat/completions", + "servers": "/v1/mcp/servers/:model", + "prompts": "/v1/mcp/prompts/:model", + "resources": "/v1/mcp/resources/:model", + }, + "p2p": map[string]string{ + "nodes": "/api/p2p", + "token": "/api/p2p/token", + }, + "agents": map[string]string{ + "tasks": "/api/agent/tasks", + "jobs": "/api/agent/jobs", + "execute": "/api/agent/jobs/execute", + }, + "settings": map[string]string{ + "get": "/api/settings", + "update": "/api/settings", + }, + "stores": map[string]string{ + "set": "/stores/set", + "get": "/stores/get", + "find": "/stores/find", + "delete": "/stores/delete", + }, + "docs": map[string]string{ + "swagger": "/swagger/index.html", + "skills": "/api/skills", + }, + }, + "capabilities": map[string]bool{ + "config_metadata": true, + "config_patch": true, + "vram_estimate": true, + "mcp": !appConfig.DisableMCP, + "agents": appConfig.AgentPool.Enabled, + "p2p": appConfig.P2PToken != "", + "tracing": true, + }, + }) + }) + + // API skills for agent discovery (no auth — agents should discover these without credentials) + router.GET("/api/skills", localai.ListAPISkillsEndpoint()) + router.GET("/api/skills/:name", localai.GetAPISkillEndpoint()) + router.GET("/api/features", func(c echo.Context) error { return c.JSON(200, map[string]bool{ "agents": appConfig.AgentPool.Enabled, diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 81d9b4275ef0..ad539c7ee1f0 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -701,6 +701,18 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model return c.JSON(http.StatusOK, modelConfig) }, adminMiddleware) + // Config metadata API - returns field metadata for all ~170 config fields + app.GET("/api/models/config-metadata", localai.ConfigMetadataEndpoint(), adminMiddleware) + + // Autocomplete providers for config fields (dynamic values only) + app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware) + + // PATCH config endpoint - partial update using nested JSON merge + app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware) + + // VRAM estimation endpoint + app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware) + // Get installed model YAML config for the React model editor app.GET("/api/models/edit/:name", func(c echo.Context) error { modelName := c.Param("name") @@ -1307,3 +1319,4 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model }) }, adminMiddleware) } + diff --git a/core/schema/agent_jobs.go b/core/schema/agent_jobs.go index ac5d13fc6c9f..51f28e50ffab 100644 --- a/core/schema/agent_jobs.go +++ b/core/schema/agent_jobs.go @@ -126,9 +126,9 @@ type JobExecutionRequest struct { // JobExecutionResponse represents the response after creating a job type JobExecutionResponse struct { - JobID string `json:"job_id"` - Status string `json:"status"` - URL string `json:"url"` // URL to check job status + JobID string `json:"job_id"` // unique job identifier + Status string `json:"status"` // initial status (pending) + URL string `json:"url"` // URL to poll for job status } // TasksFile represents the structure of agent_tasks.json diff --git a/core/schema/localai.go b/core/schema/localai.go index 6f98bf320eee..0f130f95c371 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -33,21 +33,21 @@ type GalleryResponse struct { type VideoRequest struct { BasicModelRequest - Prompt string `json:"prompt" yaml:"prompt"` - NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` - StartImage string `json:"start_image" yaml:"start_image"` - EndImage string `json:"end_image" yaml:"end_image"` - Width int32 `json:"width" yaml:"width"` - Height int32 `json:"height" yaml:"height"` - NumFrames int32 `json:"num_frames" yaml:"num_frames"` - FPS int32 `json:"fps" yaml:"fps"` - Seconds string `json:"seconds,omitempty" yaml:"seconds,omitempty"` - Size string `json:"size,omitempty" yaml:"size,omitempty"` - InputReference string `json:"input_reference,omitempty" yaml:"input_reference,omitempty"` - Seed int32 `json:"seed" yaml:"seed"` - CFGScale float32 `json:"cfg_scale" yaml:"cfg_scale"` - Step int32 `json:"step" yaml:"step"` - ResponseFormat string `json:"response_format" yaml:"response_format"` + Prompt string `json:"prompt" yaml:"prompt"` // text description of the video to generate + NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` // things to avoid in the output + StartImage string `json:"start_image" yaml:"start_image"` // URL or base64 of the first frame + EndImage string `json:"end_image" yaml:"end_image"` // URL or base64 of the last frame + Width int32 `json:"width" yaml:"width"` // output width in pixels + Height int32 `json:"height" yaml:"height"` // output height in pixels + NumFrames int32 `json:"num_frames" yaml:"num_frames"` // total number of frames to generate + FPS int32 `json:"fps" yaml:"fps"` // frames per second + Seconds string `json:"seconds,omitempty" yaml:"seconds,omitempty"` // duration in seconds (alternative to num_frames) + Size string `json:"size,omitempty" yaml:"size,omitempty"` // WxH shorthand (e.g. "512x512") + InputReference string `json:"input_reference,omitempty" yaml:"input_reference,omitempty"` // reference image or video URL + Seed int32 `json:"seed" yaml:"seed"` // random seed for reproducibility + CFGScale float32 `json:"cfg_scale" yaml:"cfg_scale"` // classifier-free guidance scale + Step int32 `json:"step" yaml:"step"` // number of diffusion steps + ResponseFormat string `json:"response_format" yaml:"response_format"` // output format (url or b64_json) } // @Description TTS request body @@ -55,7 +55,7 @@ type TTSRequest struct { BasicModelRequest Input string `json:"input" yaml:"input"` // text input Voice string `json:"voice" yaml:"voice"` // voice audio file or speaker id - Backend string `json:"backend" yaml:"backend"` + Backend string `json:"backend" yaml:"backend"` // backend engine override Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model Format string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format Stream bool `json:"stream,omitempty" yaml:"stream,omitempty"` // (optional) enable streaming TTS @@ -65,7 +65,7 @@ type TTSRequest struct { // @Description VAD request body type VADRequest struct { BasicModelRequest - Audio []float32 `json:"audio" yaml:"audio"` // model name or full path + Audio []float32 `json:"audio" yaml:"audio"` // raw audio samples as float32 PCM } type VADSegment struct { @@ -146,13 +146,13 @@ type SysInfoModel struct { } type SystemInformationResponse struct { - Backends []string `json:"backends"` - Models []SysInfoModel `json:"loaded_models"` + Backends []string `json:"backends"` // available backend engines + Models []SysInfoModel `json:"loaded_models"` // currently loaded models } type DetectionRequest struct { BasicModelRequest - Image string `json:"image"` + Image string `json:"image"` // URL or base64-encoded image to analyze } type DetectionResponse struct { diff --git a/core/schema/tokenize.go b/core/schema/tokenize.go index e481f186333f..5129b6ab7a62 100644 --- a/core/schema/tokenize.go +++ b/core/schema/tokenize.go @@ -2,9 +2,9 @@ package schema type TokenizeRequest struct { BasicModelRequest - Content string `json:"content"` + Content string `json:"content"` // text to tokenize } type TokenizeResponse struct { - Tokens []int32 `json:"tokens"` + Tokens []int32 `json:"tokens"` // token IDs } diff --git a/pkg/vram/gguf_reader.go b/pkg/vram/gguf_reader.go index 631c017f7418..3f731b482d3b 100644 --- a/pkg/vram/gguf_reader.go +++ b/pkg/vram/gguf_reader.go @@ -34,10 +34,11 @@ func (defaultGGUFReader) ReadMetadata(ctx context.Context, uri string) (*GGUFMet func ggufFileToMeta(f *gguf.GGUFFile) *GGUFMeta { arch := f.Architecture() meta := &GGUFMeta{ - BlockCount: uint32(arch.BlockCount), - EmbeddingLength: uint32(arch.EmbeddingLength), - HeadCount: uint32(arch.AttentionHeadCount), - HeadCountKV: uint32(arch.AttentionHeadCountKV), + BlockCount: uint32(arch.BlockCount), + EmbeddingLength: uint32(arch.EmbeddingLength), + HeadCount: uint32(arch.AttentionHeadCount), + HeadCountKV: uint32(arch.AttentionHeadCountKV), + MaximumContextLength: arch.MaximumContextLength, } if meta.HeadCountKV == 0 { meta.HeadCountKV = meta.HeadCount diff --git a/pkg/vram/types.go b/pkg/vram/types.go index cda76aff6378..35180185de32 100644 --- a/pkg/vram/types.go +++ b/pkg/vram/types.go @@ -15,10 +15,11 @@ type SizeResolver interface { // GGUFMeta holds parsed GGUF metadata used for VRAM estimation. type GGUFMeta struct { - BlockCount uint32 - EmbeddingLength uint32 - HeadCount uint32 - HeadCountKV uint32 + BlockCount uint32 + EmbeddingLength uint32 + HeadCount uint32 + HeadCountKV uint32 + MaximumContextLength uint64 } // GGUFMetadataReader reads GGUF metadata from a URI (e.g. via HTTP Range). @@ -35,8 +36,8 @@ type EstimateOptions struct { // EstimateResult holds estimated download size and VRAM with display strings. type EstimateResult struct { - SizeBytes uint64 - SizeDisplay string - VRAMBytes uint64 - VRAMDisplay string + SizeBytes uint64 `json:"sizeBytes"` // total model weight size in bytes + SizeDisplay string `json:"sizeDisplay"` // human-readable size (e.g. "4.2 GB") + VRAMBytes uint64 `json:"vrambytes"` // estimated VRAM usage in bytes + VRAMDisplay string `json:"vramdisplay"` // human-readable VRAM (e.g. "6.1 GB") } diff --git a/swagger/docs.go b/swagger/docs.go index 52c3be58def2..74521ff0bb26 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -22,8 +22,609 @@ const docTemplate = `{ "host": "{{.Host}}", "basePath": "{{.BasePath}}", "paths": { + "/api/agent/jobs": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "List agent jobs", + "parameters": [ + { + "type": "string", + "description": "Filter by task ID", + "name": "task_id", + "in": "query" + }, + { + "type": "string", + "description": "Filter by status (pending, running, completed, failed, cancelled)", + "name": "status", + "in": "query" + }, + { + "type": "integer", + "description": "Max number of jobs to return", + "name": "limit", + "in": "query" + }, + { + "type": "string", + "description": "Set to 'true' for admin cross-user listing", + "name": "all_users", + "in": "query" + } + ], + "responses": { + "200": { + "description": "jobs", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Job" + } + } + } + } + } + }, + "/api/agent/jobs/execute": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Execute an agent job", + "parameters": [ + { + "description": "Job execution request", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.JobExecutionRequest" + } + } + ], + "responses": { + "201": { + "description": "job created", + "schema": { + "$ref": "#/definitions/schema.JobExecutionResponse" + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/jobs/{id}": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Get an agent job", + "parameters": [ + { + "type": "string", + "description": "Job ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "job", + "schema": { + "$ref": "#/definitions/schema.Job" + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "delete": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Delete an agent job", + "parameters": [ + { + "type": "string", + "description": "Job ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/jobs/{id}/cancel": { + "post": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Cancel an agent job", + "parameters": [ + { + "type": "string", + "description": "Job ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/tasks": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "List agent tasks", + "parameters": [ + { + "type": "string", + "description": "Set to 'true' for admin cross-user listing", + "name": "all_users", + "in": "query" + } + ], + "responses": { + "200": { + "description": "tasks", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Task" + } + } + } + } + }, + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Create a new agent task", + "parameters": [ + { + "description": "Task definition", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.Task" + } + } + ], + "responses": { + "201": { + "description": "id", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/tasks/{id}": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Get an agent task", + "parameters": [ + { + "type": "string", + "description": "Task ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "task", + "schema": { + "$ref": "#/definitions/schema.Task" + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "put": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Update an agent task", + "parameters": [ + { + "type": "string", + "description": "Task ID", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "Updated task definition", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.Task" + } + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "delete": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Delete an agent task", + "parameters": [ + { + "type": "string", + "description": "Task ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/tasks/{name}/execute": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Execute an agent task by name", + "parameters": [ + { + "type": "string", + "description": "Task name", + "name": "name", + "in": "path", + "required": true + }, + { + "description": "Optional template parameters", + "name": "parameters", + "in": "body", + "schema": { + "type": "object" + } + } + ], + "responses": { + "201": { + "description": "job created", + "schema": { + "$ref": "#/definitions/schema.JobExecutionResponse" + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/models/config-json/{name}": { + "patch": { + "description": "Deep-merges the JSON patch body into the existing model config", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "Partially update a model configuration", + "parameters": [ + { + "type": "string", + "description": "Model name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "success message", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + } + }, + "/api/models/config-metadata": { + "get": { + "description": "Returns ~170 config fields with types, UI hints, sections, and options", + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "List all model configuration field metadata", + "responses": { + "200": { + "description": "List of field metadata", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/meta.FieldMeta" + } + } + } + } + } + }, + "/api/models/config-metadata/autocomplete/{provider}": { + "get": { + "description": "Returns runtime-resolved values for dynamic providers (backends, models)", + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "Get dynamic autocomplete values for a config field", + "parameters": [ + { + "type": "string", + "description": "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)", + "name": "provider", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "values array", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + } + }, + "/api/models/vram-estimate": { + "post": { + "description": "Estimates VRAM based on model weight files, context size, and GPU layers", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "Estimate VRAM usage for a model", + "parameters": [ + { + "description": "VRAM estimation parameters", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/localai.vramEstimateRequest" + } + } + ], + "responses": { + "200": { + "description": "VRAM estimate", + "schema": { + "$ref": "#/definitions/localai.vramEstimateResponse" + } + } + } + } + }, "/api/p2p": { "get": { + "tags": [ + "p2p" + ], "summary": "Returns available P2P nodes", "responses": { "200": { @@ -40,6 +641,9 @@ const docTemplate = `{ }, "/api/p2p/token": { "get": { + "tags": [ + "p2p" + ], "summary": "Show the P2P token", "responses": { "200": { @@ -53,6 +657,9 @@ const docTemplate = `{ }, "/backend/monitor": { "get": { + "tags": [ + "monitoring" + ], "summary": "Backend monitor endpoint", "parameters": [ { @@ -77,7 +684,10 @@ const docTemplate = `{ }, "/backend/shutdown": { "post": { - "summary": "Backend monitor endpoint", + "tags": [ + "monitoring" + ], + "summary": "Backend shutdown endpoint", "parameters": [ { "description": "Backend statistics request", @@ -94,6 +704,9 @@ const docTemplate = `{ }, "/backends": { "get": { + "tags": [ + "backends" + ], "summary": "List all Backends", "responses": { "200": { @@ -110,6 +723,9 @@ const docTemplate = `{ }, "/backends/apply": { "post": { + "tags": [ + "backends" + ], "summary": "Install backends to LocalAI.", "parameters": [ { @@ -134,6 +750,9 @@ const docTemplate = `{ }, "/backends/available": { "get": { + "tags": [ + "backends" + ], "summary": "List all available Backends", "responses": { "200": { @@ -150,6 +769,9 @@ const docTemplate = `{ }, "/backends/delete/{name}": { "post": { + "tags": [ + "backends" + ], "summary": "delete backends from LocalAI.", "parameters": [ { @@ -172,6 +794,9 @@ const docTemplate = `{ }, "/backends/galleries": { "get": { + "tags": [ + "backends" + ], "summary": "List all Galleries", "responses": { "200": { @@ -188,6 +813,9 @@ const docTemplate = `{ }, "/backends/jobs": { "get": { + "tags": [ + "backends" + ], "summary": "Returns all the jobs status progress", "responses": { "200": { @@ -204,6 +832,9 @@ const docTemplate = `{ }, "/backends/jobs/{uuid}": { "get": { + "tags": [ + "backends" + ], "summary": "Returns the job status", "responses": { "200": { @@ -217,23 +848,28 @@ const docTemplate = `{ }, "/metrics": { "get": { + "produces": [ + "text/plain" + ], + "tags": [ + "monitoring" + ], "summary": "Prometheus metrics endpoint", - "parameters": [ - { - "description": "Gallery details", - "name": "request", - "in": "body", - "required": true, + "responses": { + "200": { + "description": "Prometheus metrics", "schema": { - "$ref": "#/definitions/config.Gallery" + "type": "string" } } - ], - "responses": {} + } } }, "/models/apply": { "post": { + "tags": [ + "models" + ], "summary": "Install models to LocalAI.", "parameters": [ { @@ -258,6 +894,9 @@ const docTemplate = `{ }, "/models/available": { "get": { + "tags": [ + "models" + ], "summary": "List installable models.", "responses": { "200": { @@ -265,7 +904,7 @@ const docTemplate = `{ "schema": { "type": "array", "items": { - "$ref": "#/definitions/gallery.GalleryModel" + "$ref": "#/definitions/gallery.Metadata" } } } @@ -274,6 +913,9 @@ const docTemplate = `{ }, "/models/delete/{name}": { "post": { + "tags": [ + "models" + ], "summary": "delete models to LocalAI.", "parameters": [ { @@ -296,6 +938,9 @@ const docTemplate = `{ }, "/models/galleries": { "get": { + "tags": [ + "models" + ], "summary": "List all Galleries", "responses": { "200": { @@ -312,6 +957,9 @@ const docTemplate = `{ }, "/models/jobs": { "get": { + "tags": [ + "models" + ], "summary": "Returns all the jobs status progress", "responses": { "200": { @@ -328,6 +976,9 @@ const docTemplate = `{ }, "/models/jobs/{uuid}": { "get": { + "tags": [ + "models" + ], "summary": "Returns the job status", "responses": { "200": { @@ -341,6 +992,9 @@ const docTemplate = `{ }, "/system": { "get": { + "tags": [ + "monitoring" + ], "summary": "Show the LocalAI instance information", "responses": { "200": { @@ -360,6 +1014,9 @@ const docTemplate = `{ "produces": [ "audio/x-wav" ], + "tags": [ + "tokenize" + ], "summary": "Get TokenMetrics for Active Slot.", "responses": { "200": { @@ -379,6 +1036,9 @@ const docTemplate = `{ "produces": [ "audio/x-wav" ], + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -409,6 +1069,9 @@ const docTemplate = `{ "produces": [ "audio/x-wav" ], + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -436,6 +1099,9 @@ const docTemplate = `{ "consumes": [ "multipart/form-data" ], + "tags": [ + "audio" + ], "summary": "Transcribes audio into the input language.", "parameters": [ { @@ -468,6 +1134,9 @@ const docTemplate = `{ }, "/v1/chat/completions": { "post": { + "tags": [ + "inference" + ], "summary": "Generate a chat completions for a given prompt and model.", "parameters": [ { @@ -492,6 +1161,9 @@ const docTemplate = `{ }, "/v1/completions": { "post": { + "tags": [ + "inference" + ], "summary": "Generate completions for a given prompt and model.", "parameters": [ { @@ -516,6 +1188,9 @@ const docTemplate = `{ }, "/v1/detection": { "post": { + "tags": [ + "detection" + ], "summary": "Detects objects in the input image.", "parameters": [ { @@ -540,6 +1215,9 @@ const docTemplate = `{ }, "/v1/edits": { "post": { + "tags": [ + "inference" + ], "summary": "OpenAI edit endpoint", "parameters": [ { @@ -564,6 +1242,9 @@ const docTemplate = `{ }, "/v1/embeddings": { "post": { + "tags": [ + "embeddings" + ], "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.", "parameters": [ { @@ -588,6 +1269,9 @@ const docTemplate = `{ }, "/v1/images/generations": { "post": { + "tags": [ + "images" + ], "summary": "Creates an image given a prompt.", "parameters": [ { @@ -689,6 +1373,9 @@ const docTemplate = `{ }, "/v1/mcp/chat/completions": { "post": { + "tags": [ + "mcp" + ], "summary": "MCP chat completions with automatic tool execution", "parameters": [ { @@ -713,6 +1400,9 @@ const docTemplate = `{ }, "/v1/messages": { "post": { + "tags": [ + "inference" + ], "summary": "Generate a message response for the given messages and model.", "parameters": [ { @@ -737,6 +1427,9 @@ const docTemplate = `{ }, "/v1/models": { "get": { + "tags": [ + "models" + ], "summary": "List and describe the various models available in the API.", "responses": { "200": { @@ -750,6 +1443,9 @@ const docTemplate = `{ }, "/v1/rerank": { "post": { + "tags": [ + "rerank" + ], "summary": "Reranks a list of phrases by relevance to a given text query.", "parameters": [ { @@ -774,6 +1470,9 @@ const docTemplate = `{ }, "/v1/responses": { "post": { + "tags": [ + "inference" + ], "summary": "Create a response using the Open Responses API", "parameters": [ { @@ -799,6 +1498,9 @@ const docTemplate = `{ "/v1/responses/{id}": { "get": { "description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.", + "tags": [ + "inference" + ], "summary": "Get a response by ID", "parameters": [ { @@ -848,6 +1550,9 @@ const docTemplate = `{ "/v1/responses/{id}/cancel": { "post": { "description": "Cancel a background response if it's still in progress", + "tags": [ + "inference" + ], "summary": "Cancel a response", "parameters": [ { @@ -884,6 +1589,9 @@ const docTemplate = `{ }, "/v1/sound-generation": { "post": { + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -908,6 +1616,9 @@ const docTemplate = `{ }, "/v1/text-to-speech/{voice-id}": { "post": { + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -945,6 +1656,9 @@ const docTemplate = `{ "produces": [ "audio/x-wav" ], + "tags": [ + "tokenize" + ], "summary": "Get TokenMetrics for Active Slot.", "responses": { "200": { @@ -958,6 +1672,9 @@ const docTemplate = `{ }, "/v1/tokenize": { "post": { + "tags": [ + "tokenize" + ], "summary": "Tokenize the input.", "parameters": [ { @@ -985,6 +1702,9 @@ const docTemplate = `{ "consumes": [ "application/json" ], + "tags": [ + "audio" + ], "summary": "Detect voice fragments in an audio stream", "parameters": [ { @@ -1009,6 +1729,9 @@ const docTemplate = `{ }, "/video": { "post": { + "tags": [ + "video" + ], "summary": "Creates a video given a prompt.", "parameters": [ { @@ -1197,7 +1920,74 @@ const docTemplate = `{ } } }, - "gallery.GalleryModel": { + "gallery.Metadata": { + "type": "object", + "properties": { + "backend": { + "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.", + "type": "string" + }, + "description": { + "type": "string" + }, + "files": { + "description": "AdditionalFiles are used to add additional files to the model", + "type": "array", + "items": { + "$ref": "#/definitions/gallery.File" + } + }, + "gallery": { + "description": "Gallery is a reference to the gallery which contains the model", + "allOf": [ + { + "$ref": "#/definitions/config.Gallery" + } + ] + }, + "icon": { + "type": "string" + }, + "installed": { + "description": "Installed is used to indicate if the model is installed or not", + "type": "boolean" + }, + "license": { + "type": "string" + }, + "name": { + "type": "string" + }, + "size": { + "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.", + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "url": { + "type": "string" + }, + "urls": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "localai.GalleryBackend": { + "type": "object", + "properties": { + "id": { + "type": "string" + } + } + }, + "localai.GalleryModel": { "type": "object", "properties": { "backend": { @@ -1230,6 +2020,9 @@ const docTemplate = `{ "icon": { "type": "string" }, + "id": { + "type": "string" + }, "installed": { "description": "Installed is used to indicate if the model is installed or not", "type": "boolean" @@ -1266,83 +2059,137 @@ const docTemplate = `{ } } }, - "localai.GalleryBackend": { + "localai.vramEstimateRequest": { "type": "object", "properties": { - "id": { + "context_size": { + "description": "context length to estimate for (default 8192)", + "type": "integer" + }, + "gpu_layers": { + "description": "number of layers to offload to GPU (0 = all)", + "type": "integer" + }, + "kv_quant_bits": { + "description": "KV cache quantization bits (0 = fp16)", + "type": "integer" + }, + "model": { + "description": "model name (must be installed)", "type": "string" } } }, - "localai.GalleryModel": { + "localai.vramEstimateResponse": { "type": "object", "properties": { - "backend": { - "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.", + "context_note": { + "description": "note when context_size was defaulted", "type": "string" }, - "config_file": { - "description": "config_file is read in the situation where URL is blank - and therefore this is a base config.", - "type": "object", - "additionalProperties": true + "model_max_context": { + "description": "model's trained maximum context length", + "type": "integer" + }, + "sizeBytes": { + "description": "total model weight size in bytes", + "type": "integer" + }, + "sizeDisplay": { + "description": "human-readable size (e.g. \"4.2 GB\")", + "type": "string" + }, + "vrambytes": { + "description": "estimated VRAM usage in bytes", + "type": "integer" + }, + "vramdisplay": { + "description": "human-readable VRAM (e.g. \"6.1 GB\")", + "type": "string" + } + } + }, + "meta.FieldMeta": { + "type": "object", + "properties": { + "advanced": { + "type": "boolean" + }, + "autocomplete_provider": { + "description": "\"backends\", \"models:chat\", etc.", + "type": "string" }, + "component": { + "description": "\"input\", \"number\", \"toggle\", \"select\", \"slider\", etc.", + "type": "string" + }, + "default": {}, "description": { + "description": "help text", + "type": "string" + }, + "go_type": { + "description": "\"*int\", \"string\", \"[]string\"", "type": "string" }, - "files": { - "description": "AdditionalFiles are used to add additional files to the model", + "label": { + "description": "human-readable label", + "type": "string" + }, + "max": { + "type": "number" + }, + "min": { + "type": "number" + }, + "options": { "type": "array", "items": { - "$ref": "#/definitions/gallery.File" + "$ref": "#/definitions/meta.FieldOption" } }, - "gallery": { - "description": "Gallery is a reference to the gallery which contains the model", - "allOf": [ - { - "$ref": "#/definitions/config.Gallery" - } - ] + "order": { + "type": "integer" }, - "icon": { + "path": { + "description": "dot-path: \"context_size\", \"function.grammar.parallel_calls\"", "type": "string" }, - "id": { + "placeholder": { "type": "string" }, - "installed": { - "description": "Installed is used to indicate if the model is installed or not", + "pointer": { + "description": "true = nil means \"not set\"", "type": "boolean" }, - "license": { - "type": "string" - }, - "name": { + "section": { + "description": "\"general\", \"llm\", \"templates\", etc.", "type": "string" }, - "overrides": { - "description": "Overrides are used to override the configuration of the model located at URL", - "type": "object", - "additionalProperties": true + "step": { + "type": "number" }, - "size": { - "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.", + "ui_type": { + "description": "\"string\", \"int\", \"float\", \"bool\", \"[]string\", \"map\", \"object\"", "type": "string" }, - "tags": { - "type": "array", - "items": { - "type": "string" - } + "vram_impact": { + "type": "boolean" }, - "url": { + "yaml_key": { + "description": "leaf yaml key", + "type": "string" + } + } + }, + "meta.FieldOption": { + "type": "object", + "properties": { + "label": { "type": "string" }, - "urls": { - "type": "array", - "items": { - "type": "string" - } + "value": { + "type": "string" } } }, @@ -1639,6 +2486,7 @@ const docTemplate = `{ "type": "object", "properties": { "image": { + "description": "URL or base64-encoded image to analyze", "type": "string" }, "model": { @@ -1848,6 +2696,205 @@ const docTemplate = `{ } } }, + "schema.Job": { + "type": "object", + "properties": { + "audios": { + "description": "List of audio URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "completed_at": { + "type": "string" + }, + "created_at": { + "type": "string" + }, + "error": { + "description": "Error message if failed", + "type": "string" + }, + "files": { + "description": "List of file URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "description": "UUID", + "type": "string" + }, + "images": { + "description": "Multimedia content (for manual execution)\nCan contain URLs or base64-encoded data URIs", + "type": "array", + "items": { + "type": "string" + } + }, + "parameters": { + "description": "Template parameters", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "result": { + "description": "Agent response", + "type": "string" + }, + "started_at": { + "type": "string" + }, + "status": { + "description": "pending, running, completed, failed, cancelled", + "allOf": [ + { + "$ref": "#/definitions/schema.JobStatus" + } + ] + }, + "task_id": { + "description": "Reference to Task", + "type": "string" + }, + "traces": { + "description": "Execution traces (reasoning, tool calls, tool results)", + "type": "array", + "items": { + "$ref": "#/definitions/schema.JobTrace" + } + }, + "triggered_by": { + "description": "\"manual\", \"cron\", \"api\"", + "type": "string" + }, + "videos": { + "description": "List of video URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "webhook_error": { + "description": "Error if webhook failed", + "type": "string" + }, + "webhook_sent": { + "description": "Webhook delivery tracking", + "type": "boolean" + }, + "webhook_sent_at": { + "type": "string" + } + } + }, + "schema.JobExecutionRequest": { + "type": "object", + "properties": { + "audios": { + "description": "List of audio URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "files": { + "description": "List of file URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "images": { + "description": "Multimedia content (optional, for manual execution)\nCan contain URLs or base64-encoded data URIs", + "type": "array", + "items": { + "type": "string" + } + }, + "parameters": { + "description": "Optional, for templating", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "task_id": { + "description": "Required", + "type": "string" + }, + "videos": { + "description": "List of video URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "schema.JobExecutionResponse": { + "type": "object", + "properties": { + "job_id": { + "description": "unique job identifier", + "type": "string" + }, + "status": { + "description": "initial status (pending)", + "type": "string" + }, + "url": { + "description": "URL to poll for job status", + "type": "string" + } + } + }, + "schema.JobStatus": { + "type": "string", + "enum": [ + "pending", + "running", + "completed", + "failed", + "cancelled" + ], + "x-enum-varnames": [ + "JobStatusPending", + "JobStatusRunning", + "JobStatusCompleted", + "JobStatusFailed", + "JobStatusCancelled" + ] + }, + "schema.JobTrace": { + "type": "object", + "properties": { + "arguments": { + "description": "Tool arguments or result data", + "type": "object", + "additionalProperties": true + }, + "content": { + "description": "The actual trace content", + "type": "string" + }, + "timestamp": { + "description": "When this trace occurred", + "type": "string" + }, + "tool_name": { + "description": "Tool name (for tool_call/tool_result)", + "type": "string" + }, + "type": { + "description": "\"reasoning\", \"tool_call\", \"tool_result\", \"status\"", + "type": "string" + } + } + }, "schema.LogprobContent": { "type": "object", "properties": { @@ -1961,6 +3008,26 @@ const docTemplate = `{ } } }, + "schema.MultimediaSourceConfig": { + "type": "object", + "properties": { + "headers": { + "description": "Custom headers for HTTP request (e.g., Authorization)", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "type": { + "description": "\"image\", \"video\", \"audio\", \"file\"", + "type": "string" + }, + "url": { + "description": "URL to fetch from", + "type": "string" + } + } + }, "schema.NodeData": { "type": "object", "properties": { @@ -2835,12 +3902,14 @@ const docTemplate = `{ "type": "object", "properties": { "backends": { + "description": "available backend engines", "type": "array", "items": { "type": "string" } }, "loaded_models": { + "description": "currently loaded models", "type": "array", "items": { "$ref": "#/definitions/schema.SysInfoModel" @@ -2853,6 +3922,7 @@ const docTemplate = `{ "type": "object", "properties": { "backend": { + "description": "backend engine override", "type": "string" }, "input": { @@ -2884,10 +3954,71 @@ const docTemplate = `{ } } }, + "schema.Task": { + "type": "object", + "properties": { + "created_at": { + "type": "string" + }, + "cron": { + "description": "Optional cron expression", + "type": "string" + }, + "cron_parameters": { + "description": "Parameters to use when executing cron jobs", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "description": { + "description": "Optional description", + "type": "string" + }, + "enabled": { + "description": "Can be disabled without deletion", + "type": "boolean" + }, + "id": { + "description": "UUID", + "type": "string" + }, + "model": { + "description": "Model name (must have MCP config)", + "type": "string" + }, + "multimedia_sources": { + "description": "Multimedia sources (for cron jobs)\nURLs to fetch multimedia content from when cron job executes\nEach source can have custom headers for authentication/authorization", + "type": "array", + "items": { + "$ref": "#/definitions/schema.MultimediaSourceConfig" + } + }, + "name": { + "description": "User-friendly name", + "type": "string" + }, + "prompt": { + "description": "Template prompt (supports {{.param}} syntax)", + "type": "string" + }, + "updated_at": { + "type": "string" + }, + "webhooks": { + "description": "Webhook configuration (for notifications)\nSupport multiple webhook endpoints\nWebhooks can handle both success and failure cases using template variables:\n- {{.Job}} - Job object with all fields\n- {{.Task}} - Task object\n- {{.Result}} - Job result (if successful)\n- {{.Error}} - Error message (if failed, empty string if successful)\n- {{.Status}} - Job status string", + "type": "array", + "items": { + "$ref": "#/definitions/schema.WebhookConfig" + } + } + } + }, "schema.TokenizeRequest": { "type": "object", "properties": { "content": { + "description": "text to tokenize", "type": "string" }, "model": { @@ -2899,6 +4030,7 @@ const docTemplate = `{ "type": "object", "properties": { "tokens": { + "description": "token IDs", "type": "array", "items": { "type": "integer" @@ -2928,7 +4060,7 @@ const docTemplate = `{ "type": "object", "properties": { "audio": { - "description": "model name or full path", + "description": "raw audio samples as float32 PCM", "type": "array", "items": { "type": "number" @@ -2943,55 +4075,94 @@ const docTemplate = `{ "type": "object", "properties": { "cfg_scale": { + "description": "classifier-free guidance scale", "type": "number" }, "end_image": { + "description": "URL or base64 of the last frame", "type": "string" }, "fps": { + "description": "frames per second", "type": "integer" }, "height": { + "description": "output height in pixels", "type": "integer" }, "input_reference": { + "description": "reference image or video URL", "type": "string" }, "model": { "type": "string" }, "negative_prompt": { + "description": "things to avoid in the output", "type": "string" }, "num_frames": { + "description": "total number of frames to generate", "type": "integer" }, "prompt": { + "description": "text description of the video to generate", "type": "string" }, "response_format": { + "description": "output format (url or b64_json)", "type": "string" }, "seconds": { + "description": "duration in seconds (alternative to num_frames)", "type": "string" }, "seed": { + "description": "random seed for reproducibility", "type": "integer" }, "size": { + "description": "WxH shorthand (e.g. \"512x512\")", "type": "string" }, "start_image": { + "description": "URL or base64 of the first frame", "type": "string" }, "step": { + "description": "number of diffusion steps", "type": "integer" }, "width": { + "description": "output width in pixels", "type": "integer" } } }, + "schema.WebhookConfig": { + "type": "object", + "properties": { + "headers": { + "description": "Custom headers (e.g., Authorization)", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "method": { + "description": "HTTP method (POST, PUT, PATCH) - default: POST", + "type": "string" + }, + "payload_template": { + "description": "Optional template for payload", + "type": "string" + }, + "url": { + "description": "Webhook endpoint URL", + "type": "string" + } + } + }, "services.GalleryOpStatus": { "type": "object", "properties": { diff --git a/swagger/embed.go b/swagger/embed.go new file mode 100644 index 000000000000..167d1049658d --- /dev/null +++ b/swagger/embed.go @@ -0,0 +1,6 @@ +package swagger + +import _ "embed" + +//go:embed swagger.json +var SwaggerJSON []byte diff --git a/swagger/swagger.json b/swagger/swagger.json index ad759ffcb8b6..0fea89317527 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -15,8 +15,609 @@ }, "basePath": "/", "paths": { + "/api/agent/jobs": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "List agent jobs", + "parameters": [ + { + "type": "string", + "description": "Filter by task ID", + "name": "task_id", + "in": "query" + }, + { + "type": "string", + "description": "Filter by status (pending, running, completed, failed, cancelled)", + "name": "status", + "in": "query" + }, + { + "type": "integer", + "description": "Max number of jobs to return", + "name": "limit", + "in": "query" + }, + { + "type": "string", + "description": "Set to 'true' for admin cross-user listing", + "name": "all_users", + "in": "query" + } + ], + "responses": { + "200": { + "description": "jobs", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Job" + } + } + } + } + } + }, + "/api/agent/jobs/execute": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Execute an agent job", + "parameters": [ + { + "description": "Job execution request", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.JobExecutionRequest" + } + } + ], + "responses": { + "201": { + "description": "job created", + "schema": { + "$ref": "#/definitions/schema.JobExecutionResponse" + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/jobs/{id}": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Get an agent job", + "parameters": [ + { + "type": "string", + "description": "Job ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "job", + "schema": { + "$ref": "#/definitions/schema.Job" + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "delete": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Delete an agent job", + "parameters": [ + { + "type": "string", + "description": "Job ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/jobs/{id}/cancel": { + "post": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Cancel an agent job", + "parameters": [ + { + "type": "string", + "description": "Job ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/tasks": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "List agent tasks", + "parameters": [ + { + "type": "string", + "description": "Set to 'true' for admin cross-user listing", + "name": "all_users", + "in": "query" + } + ], + "responses": { + "200": { + "description": "tasks", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Task" + } + } + } + } + }, + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Create a new agent task", + "parameters": [ + { + "description": "Task definition", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.Task" + } + } + ], + "responses": { + "201": { + "description": "id", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/tasks/{id}": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Get an agent task", + "parameters": [ + { + "type": "string", + "description": "Task ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "task", + "schema": { + "$ref": "#/definitions/schema.Task" + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "put": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Update an agent task", + "parameters": [ + { + "type": "string", + "description": "Task ID", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "Updated task definition", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.Task" + } + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + }, + "delete": { + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Delete an agent task", + "parameters": [ + { + "type": "string", + "description": "Task ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "message", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/agent/tasks/{name}/execute": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "agent-jobs" + ], + "summary": "Execute an agent task by name", + "parameters": [ + { + "type": "string", + "description": "Task name", + "name": "name", + "in": "path", + "required": true + }, + { + "description": "Optional template parameters", + "name": "parameters", + "in": "body", + "schema": { + "type": "object" + } + } + ], + "responses": { + "201": { + "description": "job created", + "schema": { + "$ref": "#/definitions/schema.JobExecutionResponse" + } + }, + "400": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "404": { + "description": "error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/api/models/config-json/{name}": { + "patch": { + "description": "Deep-merges the JSON patch body into the existing model config", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "Partially update a model configuration", + "parameters": [ + { + "type": "string", + "description": "Model name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "success message", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + } + }, + "/api/models/config-metadata": { + "get": { + "description": "Returns ~170 config fields with types, UI hints, sections, and options", + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "List all model configuration field metadata", + "responses": { + "200": { + "description": "List of field metadata", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/meta.FieldMeta" + } + } + } + } + } + }, + "/api/models/config-metadata/autocomplete/{provider}": { + "get": { + "description": "Returns runtime-resolved values for dynamic providers (backends, models)", + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "Get dynamic autocomplete values for a config field", + "parameters": [ + { + "type": "string", + "description": "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)", + "name": "provider", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "values array", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + } + }, + "/api/models/vram-estimate": { + "post": { + "description": "Estimates VRAM based on model weight files, context size, and GPU layers", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "config" + ], + "summary": "Estimate VRAM usage for a model", + "parameters": [ + { + "description": "VRAM estimation parameters", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/localai.vramEstimateRequest" + } + } + ], + "responses": { + "200": { + "description": "VRAM estimate", + "schema": { + "$ref": "#/definitions/localai.vramEstimateResponse" + } + } + } + } + }, "/api/p2p": { "get": { + "tags": [ + "p2p" + ], "summary": "Returns available P2P nodes", "responses": { "200": { @@ -33,6 +634,9 @@ }, "/api/p2p/token": { "get": { + "tags": [ + "p2p" + ], "summary": "Show the P2P token", "responses": { "200": { @@ -46,6 +650,9 @@ }, "/backend/monitor": { "get": { + "tags": [ + "monitoring" + ], "summary": "Backend monitor endpoint", "parameters": [ { @@ -70,7 +677,10 @@ }, "/backend/shutdown": { "post": { - "summary": "Backend monitor endpoint", + "tags": [ + "monitoring" + ], + "summary": "Backend shutdown endpoint", "parameters": [ { "description": "Backend statistics request", @@ -87,6 +697,9 @@ }, "/backends": { "get": { + "tags": [ + "backends" + ], "summary": "List all Backends", "responses": { "200": { @@ -103,6 +716,9 @@ }, "/backends/apply": { "post": { + "tags": [ + "backends" + ], "summary": "Install backends to LocalAI.", "parameters": [ { @@ -127,6 +743,9 @@ }, "/backends/available": { "get": { + "tags": [ + "backends" + ], "summary": "List all available Backends", "responses": { "200": { @@ -143,6 +762,9 @@ }, "/backends/delete/{name}": { "post": { + "tags": [ + "backends" + ], "summary": "delete backends from LocalAI.", "parameters": [ { @@ -165,6 +787,9 @@ }, "/backends/galleries": { "get": { + "tags": [ + "backends" + ], "summary": "List all Galleries", "responses": { "200": { @@ -181,6 +806,9 @@ }, "/backends/jobs": { "get": { + "tags": [ + "backends" + ], "summary": "Returns all the jobs status progress", "responses": { "200": { @@ -197,6 +825,9 @@ }, "/backends/jobs/{uuid}": { "get": { + "tags": [ + "backends" + ], "summary": "Returns the job status", "responses": { "200": { @@ -210,23 +841,28 @@ }, "/metrics": { "get": { + "produces": [ + "text/plain" + ], + "tags": [ + "monitoring" + ], "summary": "Prometheus metrics endpoint", - "parameters": [ - { - "description": "Gallery details", - "name": "request", - "in": "body", - "required": true, + "responses": { + "200": { + "description": "Prometheus metrics", "schema": { - "$ref": "#/definitions/config.Gallery" + "type": "string" } } - ], - "responses": {} + } } }, "/models/apply": { "post": { + "tags": [ + "models" + ], "summary": "Install models to LocalAI.", "parameters": [ { @@ -251,6 +887,9 @@ }, "/models/available": { "get": { + "tags": [ + "models" + ], "summary": "List installable models.", "responses": { "200": { @@ -258,7 +897,7 @@ "schema": { "type": "array", "items": { - "$ref": "#/definitions/gallery.GalleryModel" + "$ref": "#/definitions/gallery.Metadata" } } } @@ -267,6 +906,9 @@ }, "/models/delete/{name}": { "post": { + "tags": [ + "models" + ], "summary": "delete models to LocalAI.", "parameters": [ { @@ -289,6 +931,9 @@ }, "/models/galleries": { "get": { + "tags": [ + "models" + ], "summary": "List all Galleries", "responses": { "200": { @@ -305,6 +950,9 @@ }, "/models/jobs": { "get": { + "tags": [ + "models" + ], "summary": "Returns all the jobs status progress", "responses": { "200": { @@ -321,6 +969,9 @@ }, "/models/jobs/{uuid}": { "get": { + "tags": [ + "models" + ], "summary": "Returns the job status", "responses": { "200": { @@ -334,6 +985,9 @@ }, "/system": { "get": { + "tags": [ + "monitoring" + ], "summary": "Show the LocalAI instance information", "responses": { "200": { @@ -353,6 +1007,9 @@ "produces": [ "audio/x-wav" ], + "tags": [ + "tokenize" + ], "summary": "Get TokenMetrics for Active Slot.", "responses": { "200": { @@ -372,6 +1029,9 @@ "produces": [ "audio/x-wav" ], + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -402,6 +1062,9 @@ "produces": [ "audio/x-wav" ], + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -429,6 +1092,9 @@ "consumes": [ "multipart/form-data" ], + "tags": [ + "audio" + ], "summary": "Transcribes audio into the input language.", "parameters": [ { @@ -461,6 +1127,9 @@ }, "/v1/chat/completions": { "post": { + "tags": [ + "inference" + ], "summary": "Generate a chat completions for a given prompt and model.", "parameters": [ { @@ -485,6 +1154,9 @@ }, "/v1/completions": { "post": { + "tags": [ + "inference" + ], "summary": "Generate completions for a given prompt and model.", "parameters": [ { @@ -509,6 +1181,9 @@ }, "/v1/detection": { "post": { + "tags": [ + "detection" + ], "summary": "Detects objects in the input image.", "parameters": [ { @@ -533,6 +1208,9 @@ }, "/v1/edits": { "post": { + "tags": [ + "inference" + ], "summary": "OpenAI edit endpoint", "parameters": [ { @@ -557,6 +1235,9 @@ }, "/v1/embeddings": { "post": { + "tags": [ + "embeddings" + ], "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.", "parameters": [ { @@ -581,6 +1262,9 @@ }, "/v1/images/generations": { "post": { + "tags": [ + "images" + ], "summary": "Creates an image given a prompt.", "parameters": [ { @@ -682,6 +1366,9 @@ }, "/v1/mcp/chat/completions": { "post": { + "tags": [ + "mcp" + ], "summary": "MCP chat completions with automatic tool execution", "parameters": [ { @@ -706,6 +1393,9 @@ }, "/v1/messages": { "post": { + "tags": [ + "inference" + ], "summary": "Generate a message response for the given messages and model.", "parameters": [ { @@ -730,6 +1420,9 @@ }, "/v1/models": { "get": { + "tags": [ + "models" + ], "summary": "List and describe the various models available in the API.", "responses": { "200": { @@ -743,6 +1436,9 @@ }, "/v1/rerank": { "post": { + "tags": [ + "rerank" + ], "summary": "Reranks a list of phrases by relevance to a given text query.", "parameters": [ { @@ -767,6 +1463,9 @@ }, "/v1/responses": { "post": { + "tags": [ + "inference" + ], "summary": "Create a response using the Open Responses API", "parameters": [ { @@ -792,6 +1491,9 @@ "/v1/responses/{id}": { "get": { "description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.", + "tags": [ + "inference" + ], "summary": "Get a response by ID", "parameters": [ { @@ -841,6 +1543,9 @@ "/v1/responses/{id}/cancel": { "post": { "description": "Cancel a background response if it's still in progress", + "tags": [ + "inference" + ], "summary": "Cancel a response", "parameters": [ { @@ -877,6 +1582,9 @@ }, "/v1/sound-generation": { "post": { + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -901,6 +1609,9 @@ }, "/v1/text-to-speech/{voice-id}": { "post": { + "tags": [ + "audio" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -938,6 +1649,9 @@ "produces": [ "audio/x-wav" ], + "tags": [ + "tokenize" + ], "summary": "Get TokenMetrics for Active Slot.", "responses": { "200": { @@ -951,6 +1665,9 @@ }, "/v1/tokenize": { "post": { + "tags": [ + "tokenize" + ], "summary": "Tokenize the input.", "parameters": [ { @@ -978,6 +1695,9 @@ "consumes": [ "application/json" ], + "tags": [ + "audio" + ], "summary": "Detect voice fragments in an audio stream", "parameters": [ { @@ -1002,6 +1722,9 @@ }, "/video": { "post": { + "tags": [ + "video" + ], "summary": "Creates a video given a prompt.", "parameters": [ { @@ -1190,7 +1913,74 @@ } } }, - "gallery.GalleryModel": { + "gallery.Metadata": { + "type": "object", + "properties": { + "backend": { + "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.", + "type": "string" + }, + "description": { + "type": "string" + }, + "files": { + "description": "AdditionalFiles are used to add additional files to the model", + "type": "array", + "items": { + "$ref": "#/definitions/gallery.File" + } + }, + "gallery": { + "description": "Gallery is a reference to the gallery which contains the model", + "allOf": [ + { + "$ref": "#/definitions/config.Gallery" + } + ] + }, + "icon": { + "type": "string" + }, + "installed": { + "description": "Installed is used to indicate if the model is installed or not", + "type": "boolean" + }, + "license": { + "type": "string" + }, + "name": { + "type": "string" + }, + "size": { + "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.", + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "url": { + "type": "string" + }, + "urls": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "localai.GalleryBackend": { + "type": "object", + "properties": { + "id": { + "type": "string" + } + } + }, + "localai.GalleryModel": { "type": "object", "properties": { "backend": { @@ -1223,6 +2013,9 @@ "icon": { "type": "string" }, + "id": { + "type": "string" + }, "installed": { "description": "Installed is used to indicate if the model is installed or not", "type": "boolean" @@ -1259,83 +2052,137 @@ } } }, - "localai.GalleryBackend": { + "localai.vramEstimateRequest": { "type": "object", "properties": { - "id": { + "context_size": { + "description": "context length to estimate for (default 8192)", + "type": "integer" + }, + "gpu_layers": { + "description": "number of layers to offload to GPU (0 = all)", + "type": "integer" + }, + "kv_quant_bits": { + "description": "KV cache quantization bits (0 = fp16)", + "type": "integer" + }, + "model": { + "description": "model name (must be installed)", "type": "string" } } }, - "localai.GalleryModel": { + "localai.vramEstimateResponse": { "type": "object", "properties": { - "backend": { - "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.", + "context_note": { + "description": "note when context_size was defaulted", "type": "string" }, - "config_file": { - "description": "config_file is read in the situation where URL is blank - and therefore this is a base config.", - "type": "object", - "additionalProperties": true + "model_max_context": { + "description": "model's trained maximum context length", + "type": "integer" + }, + "sizeBytes": { + "description": "total model weight size in bytes", + "type": "integer" + }, + "sizeDisplay": { + "description": "human-readable size (e.g. \"4.2 GB\")", + "type": "string" + }, + "vrambytes": { + "description": "estimated VRAM usage in bytes", + "type": "integer" + }, + "vramdisplay": { + "description": "human-readable VRAM (e.g. \"6.1 GB\")", + "type": "string" + } + } + }, + "meta.FieldMeta": { + "type": "object", + "properties": { + "advanced": { + "type": "boolean" + }, + "autocomplete_provider": { + "description": "\"backends\", \"models:chat\", etc.", + "type": "string" }, + "component": { + "description": "\"input\", \"number\", \"toggle\", \"select\", \"slider\", etc.", + "type": "string" + }, + "default": {}, "description": { + "description": "help text", + "type": "string" + }, + "go_type": { + "description": "\"*int\", \"string\", \"[]string\"", "type": "string" }, - "files": { - "description": "AdditionalFiles are used to add additional files to the model", + "label": { + "description": "human-readable label", + "type": "string" + }, + "max": { + "type": "number" + }, + "min": { + "type": "number" + }, + "options": { "type": "array", "items": { - "$ref": "#/definitions/gallery.File" + "$ref": "#/definitions/meta.FieldOption" } }, - "gallery": { - "description": "Gallery is a reference to the gallery which contains the model", - "allOf": [ - { - "$ref": "#/definitions/config.Gallery" - } - ] + "order": { + "type": "integer" }, - "icon": { + "path": { + "description": "dot-path: \"context_size\", \"function.grammar.parallel_calls\"", "type": "string" }, - "id": { + "placeholder": { "type": "string" }, - "installed": { - "description": "Installed is used to indicate if the model is installed or not", + "pointer": { + "description": "true = nil means \"not set\"", "type": "boolean" }, - "license": { - "type": "string" - }, - "name": { + "section": { + "description": "\"general\", \"llm\", \"templates\", etc.", "type": "string" }, - "overrides": { - "description": "Overrides are used to override the configuration of the model located at URL", - "type": "object", - "additionalProperties": true + "step": { + "type": "number" }, - "size": { - "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.", + "ui_type": { + "description": "\"string\", \"int\", \"float\", \"bool\", \"[]string\", \"map\", \"object\"", "type": "string" }, - "tags": { - "type": "array", - "items": { - "type": "string" - } + "vram_impact": { + "type": "boolean" }, - "url": { + "yaml_key": { + "description": "leaf yaml key", + "type": "string" + } + } + }, + "meta.FieldOption": { + "type": "object", + "properties": { + "label": { "type": "string" }, - "urls": { - "type": "array", - "items": { - "type": "string" - } + "value": { + "type": "string" } } }, @@ -1632,6 +2479,7 @@ "type": "object", "properties": { "image": { + "description": "URL or base64-encoded image to analyze", "type": "string" }, "model": { @@ -1841,6 +2689,205 @@ } } }, + "schema.Job": { + "type": "object", + "properties": { + "audios": { + "description": "List of audio URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "completed_at": { + "type": "string" + }, + "created_at": { + "type": "string" + }, + "error": { + "description": "Error message if failed", + "type": "string" + }, + "files": { + "description": "List of file URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "description": "UUID", + "type": "string" + }, + "images": { + "description": "Multimedia content (for manual execution)\nCan contain URLs or base64-encoded data URIs", + "type": "array", + "items": { + "type": "string" + } + }, + "parameters": { + "description": "Template parameters", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "result": { + "description": "Agent response", + "type": "string" + }, + "started_at": { + "type": "string" + }, + "status": { + "description": "pending, running, completed, failed, cancelled", + "allOf": [ + { + "$ref": "#/definitions/schema.JobStatus" + } + ] + }, + "task_id": { + "description": "Reference to Task", + "type": "string" + }, + "traces": { + "description": "Execution traces (reasoning, tool calls, tool results)", + "type": "array", + "items": { + "$ref": "#/definitions/schema.JobTrace" + } + }, + "triggered_by": { + "description": "\"manual\", \"cron\", \"api\"", + "type": "string" + }, + "videos": { + "description": "List of video URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "webhook_error": { + "description": "Error if webhook failed", + "type": "string" + }, + "webhook_sent": { + "description": "Webhook delivery tracking", + "type": "boolean" + }, + "webhook_sent_at": { + "type": "string" + } + } + }, + "schema.JobExecutionRequest": { + "type": "object", + "properties": { + "audios": { + "description": "List of audio URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "files": { + "description": "List of file URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + }, + "images": { + "description": "Multimedia content (optional, for manual execution)\nCan contain URLs or base64-encoded data URIs", + "type": "array", + "items": { + "type": "string" + } + }, + "parameters": { + "description": "Optional, for templating", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "task_id": { + "description": "Required", + "type": "string" + }, + "videos": { + "description": "List of video URLs or base64 strings", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "schema.JobExecutionResponse": { + "type": "object", + "properties": { + "job_id": { + "description": "unique job identifier", + "type": "string" + }, + "status": { + "description": "initial status (pending)", + "type": "string" + }, + "url": { + "description": "URL to poll for job status", + "type": "string" + } + } + }, + "schema.JobStatus": { + "type": "string", + "enum": [ + "pending", + "running", + "completed", + "failed", + "cancelled" + ], + "x-enum-varnames": [ + "JobStatusPending", + "JobStatusRunning", + "JobStatusCompleted", + "JobStatusFailed", + "JobStatusCancelled" + ] + }, + "schema.JobTrace": { + "type": "object", + "properties": { + "arguments": { + "description": "Tool arguments or result data", + "type": "object", + "additionalProperties": true + }, + "content": { + "description": "The actual trace content", + "type": "string" + }, + "timestamp": { + "description": "When this trace occurred", + "type": "string" + }, + "tool_name": { + "description": "Tool name (for tool_call/tool_result)", + "type": "string" + }, + "type": { + "description": "\"reasoning\", \"tool_call\", \"tool_result\", \"status\"", + "type": "string" + } + } + }, "schema.LogprobContent": { "type": "object", "properties": { @@ -1954,6 +3001,26 @@ } } }, + "schema.MultimediaSourceConfig": { + "type": "object", + "properties": { + "headers": { + "description": "Custom headers for HTTP request (e.g., Authorization)", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "type": { + "description": "\"image\", \"video\", \"audio\", \"file\"", + "type": "string" + }, + "url": { + "description": "URL to fetch from", + "type": "string" + } + } + }, "schema.NodeData": { "type": "object", "properties": { @@ -2828,12 +3895,14 @@ "type": "object", "properties": { "backends": { + "description": "available backend engines", "type": "array", "items": { "type": "string" } }, "loaded_models": { + "description": "currently loaded models", "type": "array", "items": { "$ref": "#/definitions/schema.SysInfoModel" @@ -2846,6 +3915,7 @@ "type": "object", "properties": { "backend": { + "description": "backend engine override", "type": "string" }, "input": { @@ -2877,10 +3947,71 @@ } } }, + "schema.Task": { + "type": "object", + "properties": { + "created_at": { + "type": "string" + }, + "cron": { + "description": "Optional cron expression", + "type": "string" + }, + "cron_parameters": { + "description": "Parameters to use when executing cron jobs", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "description": { + "description": "Optional description", + "type": "string" + }, + "enabled": { + "description": "Can be disabled without deletion", + "type": "boolean" + }, + "id": { + "description": "UUID", + "type": "string" + }, + "model": { + "description": "Model name (must have MCP config)", + "type": "string" + }, + "multimedia_sources": { + "description": "Multimedia sources (for cron jobs)\nURLs to fetch multimedia content from when cron job executes\nEach source can have custom headers for authentication/authorization", + "type": "array", + "items": { + "$ref": "#/definitions/schema.MultimediaSourceConfig" + } + }, + "name": { + "description": "User-friendly name", + "type": "string" + }, + "prompt": { + "description": "Template prompt (supports {{.param}} syntax)", + "type": "string" + }, + "updated_at": { + "type": "string" + }, + "webhooks": { + "description": "Webhook configuration (for notifications)\nSupport multiple webhook endpoints\nWebhooks can handle both success and failure cases using template variables:\n- {{.Job}} - Job object with all fields\n- {{.Task}} - Task object\n- {{.Result}} - Job result (if successful)\n- {{.Error}} - Error message (if failed, empty string if successful)\n- {{.Status}} - Job status string", + "type": "array", + "items": { + "$ref": "#/definitions/schema.WebhookConfig" + } + } + } + }, "schema.TokenizeRequest": { "type": "object", "properties": { "content": { + "description": "text to tokenize", "type": "string" }, "model": { @@ -2892,6 +4023,7 @@ "type": "object", "properties": { "tokens": { + "description": "token IDs", "type": "array", "items": { "type": "integer" @@ -2921,7 +4053,7 @@ "type": "object", "properties": { "audio": { - "description": "model name or full path", + "description": "raw audio samples as float32 PCM", "type": "array", "items": { "type": "number" @@ -2936,55 +4068,94 @@ "type": "object", "properties": { "cfg_scale": { + "description": "classifier-free guidance scale", "type": "number" }, "end_image": { + "description": "URL or base64 of the last frame", "type": "string" }, "fps": { + "description": "frames per second", "type": "integer" }, "height": { + "description": "output height in pixels", "type": "integer" }, "input_reference": { + "description": "reference image or video URL", "type": "string" }, "model": { "type": "string" }, "negative_prompt": { + "description": "things to avoid in the output", "type": "string" }, "num_frames": { + "description": "total number of frames to generate", "type": "integer" }, "prompt": { + "description": "text description of the video to generate", "type": "string" }, "response_format": { + "description": "output format (url or b64_json)", "type": "string" }, "seconds": { + "description": "duration in seconds (alternative to num_frames)", "type": "string" }, "seed": { + "description": "random seed for reproducibility", "type": "integer" }, "size": { + "description": "WxH shorthand (e.g. \"512x512\")", "type": "string" }, "start_image": { + "description": "URL or base64 of the first frame", "type": "string" }, "step": { + "description": "number of diffusion steps", "type": "integer" }, "width": { + "description": "output width in pixels", "type": "integer" } } }, + "schema.WebhookConfig": { + "type": "object", + "properties": { + "headers": { + "description": "Custom headers (e.g., Authorization)", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "method": { + "description": "HTTP method (POST, PUT, PATCH) - default: POST", + "type": "string" + }, + "payload_template": { + "description": "Optional template for payload", + "type": "string" + }, + "url": { + "description": "Webhook endpoint URL", + "type": "string" + } + } + }, "services.GalleryOpStatus": { "type": "object", "properties": { diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 974b5b0b4ac5..c327710c87ad 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -112,18 +112,13 @@ definitions: type: string type: array type: object - gallery.GalleryModel: + gallery.Metadata: properties: backend: description: |- Backend is the resolved backend engine for this model (e.g. "llama-cpp"). Populated at load time from overrides, inline config, or the URL-referenced config file. type: string - config_file: - additionalProperties: true - description: config_file is read in the situation where URL is blank - and - therefore this is a base config. - type: object description: type: string files: @@ -144,11 +139,6 @@ definitions: type: string name: type: string - overrides: - additionalProperties: true - description: Overrides are used to override the configuration of the model - located at URL - type: object size: description: |- Size is an optional hardcoded model size string (e.g. "500MB", "14.5GB"). @@ -225,6 +215,101 @@ definitions: type: string type: array type: object + localai.vramEstimateRequest: + properties: + context_size: + description: context length to estimate for (default 8192) + type: integer + gpu_layers: + description: number of layers to offload to GPU (0 = all) + type: integer + kv_quant_bits: + description: KV cache quantization bits (0 = fp16) + type: integer + model: + description: model name (must be installed) + type: string + type: object + localai.vramEstimateResponse: + properties: + context_note: + description: note when context_size was defaulted + type: string + model_max_context: + description: model's trained maximum context length + type: integer + sizeBytes: + description: total model weight size in bytes + type: integer + sizeDisplay: + description: human-readable size (e.g. "4.2 GB") + type: string + vrambytes: + description: estimated VRAM usage in bytes + type: integer + vramdisplay: + description: human-readable VRAM (e.g. "6.1 GB") + type: string + type: object + meta.FieldMeta: + properties: + advanced: + type: boolean + autocomplete_provider: + description: '"backends", "models:chat", etc.' + type: string + component: + description: '"input", "number", "toggle", "select", "slider", etc.' + type: string + default: {} + description: + description: help text + type: string + go_type: + description: '"*int", "string", "[]string"' + type: string + label: + description: human-readable label + type: string + max: + type: number + min: + type: number + options: + items: + $ref: '#/definitions/meta.FieldOption' + type: array + order: + type: integer + path: + description: 'dot-path: "context_size", "function.grammar.parallel_calls"' + type: string + placeholder: + type: string + pointer: + description: true = nil means "not set" + type: boolean + section: + description: '"general", "llm", "templates", etc.' + type: string + step: + type: number + ui_type: + description: '"string", "int", "float", "bool", "[]string", "map", "object"' + type: string + vram_impact: + type: boolean + yaml_key: + description: leaf yaml key + type: string + type: object + meta.FieldOption: + properties: + label: + type: string + value: + type: string + type: object proto.MemoryUsageData: properties: breakdown: @@ -420,6 +505,7 @@ definitions: schema.DetectionRequest: properties: image: + description: URL or base64-encoded image to analyze type: string model: type: string @@ -557,6 +643,152 @@ definitions: total_tokens: type: integer type: object + schema.Job: + properties: + audios: + description: List of audio URLs or base64 strings + items: + type: string + type: array + completed_at: + type: string + created_at: + type: string + error: + description: Error message if failed + type: string + files: + description: List of file URLs or base64 strings + items: + type: string + type: array + id: + description: UUID + type: string + images: + description: |- + Multimedia content (for manual execution) + Can contain URLs or base64-encoded data URIs + items: + type: string + type: array + parameters: + additionalProperties: + type: string + description: Template parameters + type: object + result: + description: Agent response + type: string + started_at: + type: string + status: + allOf: + - $ref: '#/definitions/schema.JobStatus' + description: pending, running, completed, failed, cancelled + task_id: + description: Reference to Task + type: string + traces: + description: Execution traces (reasoning, tool calls, tool results) + items: + $ref: '#/definitions/schema.JobTrace' + type: array + triggered_by: + description: '"manual", "cron", "api"' + type: string + videos: + description: List of video URLs or base64 strings + items: + type: string + type: array + webhook_error: + description: Error if webhook failed + type: string + webhook_sent: + description: Webhook delivery tracking + type: boolean + webhook_sent_at: + type: string + type: object + schema.JobExecutionRequest: + properties: + audios: + description: List of audio URLs or base64 strings + items: + type: string + type: array + files: + description: List of file URLs or base64 strings + items: + type: string + type: array + images: + description: |- + Multimedia content (optional, for manual execution) + Can contain URLs or base64-encoded data URIs + items: + type: string + type: array + parameters: + additionalProperties: + type: string + description: Optional, for templating + type: object + task_id: + description: Required + type: string + videos: + description: List of video URLs or base64 strings + items: + type: string + type: array + type: object + schema.JobExecutionResponse: + properties: + job_id: + description: unique job identifier + type: string + status: + description: initial status (pending) + type: string + url: + description: URL to poll for job status + type: string + type: object + schema.JobStatus: + enum: + - pending + - running + - completed + - failed + - cancelled + type: string + x-enum-varnames: + - JobStatusPending + - JobStatusRunning + - JobStatusCompleted + - JobStatusFailed + - JobStatusCancelled + schema.JobTrace: + properties: + arguments: + additionalProperties: true + description: Tool arguments or result data + type: object + content: + description: The actual trace content + type: string + timestamp: + description: When this trace occurred + type: string + tool_name: + description: Tool name (for tool_call/tool_result) + type: string + type: + description: '"reasoning", "tool_call", "tool_result", "status"' + type: string + type: object schema.LogprobContent: properties: bytes: @@ -632,6 +864,20 @@ definitions: object: type: string type: object + schema.MultimediaSourceConfig: + properties: + headers: + additionalProperties: + type: string + description: Custom headers for HTTP request (e.g., Authorization) + type: object + type: + description: '"image", "video", "audio", "file"' + type: string + url: + description: URL to fetch from + type: string + type: object schema.NodeData: properties: id: @@ -1226,10 +1472,12 @@ definitions: schema.SystemInformationResponse: properties: backends: + description: available backend engines items: type: string type: array loaded_models: + description: currently loaded models items: $ref: '#/definitions/schema.SysInfoModel' type: array @@ -1238,6 +1486,7 @@ definitions: description: TTS request body properties: backend: + description: backend engine override type: string input: description: text input @@ -1260,9 +1509,64 @@ definitions: description: voice audio file or speaker id type: string type: object + schema.Task: + properties: + created_at: + type: string + cron: + description: Optional cron expression + type: string + cron_parameters: + additionalProperties: + type: string + description: Parameters to use when executing cron jobs + type: object + description: + description: Optional description + type: string + enabled: + description: Can be disabled without deletion + type: boolean + id: + description: UUID + type: string + model: + description: Model name (must have MCP config) + type: string + multimedia_sources: + description: |- + Multimedia sources (for cron jobs) + URLs to fetch multimedia content from when cron job executes + Each source can have custom headers for authentication/authorization + items: + $ref: '#/definitions/schema.MultimediaSourceConfig' + type: array + name: + description: User-friendly name + type: string + prompt: + description: Template prompt (supports {{.param}} syntax) + type: string + updated_at: + type: string + webhooks: + description: |- + Webhook configuration (for notifications) + Support multiple webhook endpoints + Webhooks can handle both success and failure cases using template variables: + - {{.Job}} - Job object with all fields + - {{.Task}} - Task object + - {{.Result}} - Job result (if successful) + - {{.Error}} - Error message (if failed, empty string if successful) + - {{.Status}} - Job status string + items: + $ref: '#/definitions/schema.WebhookConfig' + type: array + type: object schema.TokenizeRequest: properties: content: + description: text to tokenize type: string model: type: string @@ -1270,6 +1574,7 @@ definitions: schema.TokenizeResponse: properties: tokens: + description: token IDs items: type: integer type: array @@ -1289,7 +1594,7 @@ definitions: description: VAD request body properties: audio: - description: model name or full path + description: raw audio samples as float32 PCM items: type: number type: array @@ -1299,38 +1604,70 @@ definitions: schema.VideoRequest: properties: cfg_scale: + description: classifier-free guidance scale type: number end_image: + description: URL or base64 of the last frame type: string fps: + description: frames per second type: integer height: + description: output height in pixels type: integer input_reference: + description: reference image or video URL type: string model: type: string negative_prompt: + description: things to avoid in the output type: string num_frames: + description: total number of frames to generate type: integer prompt: + description: text description of the video to generate type: string response_format: + description: output format (url or b64_json) type: string seconds: + description: duration in seconds (alternative to num_frames) type: string seed: + description: random seed for reproducibility type: integer size: + description: WxH shorthand (e.g. "512x512") type: string start_image: + description: URL or base64 of the first frame type: string step: + description: number of diffusion steps type: integer width: + description: output width in pixels type: integer type: object + schema.WebhookConfig: + properties: + headers: + additionalProperties: + type: string + description: Custom headers (e.g., Authorization) + type: object + method: + description: 'HTTP method (POST, PUT, PATCH) - default: POST' + type: string + payload_template: + description: Optional template for payload + type: string + url: + description: Webhook endpoint URL + type: string + type: object services.GalleryOpStatus: properties: cancellable: @@ -1369,6 +1706,400 @@ info: title: LocalAI API version: 2.0.0 paths: + /api/agent/jobs: + get: + parameters: + - description: Filter by task ID + in: query + name: task_id + type: string + - description: Filter by status (pending, running, completed, failed, cancelled) + in: query + name: status + type: string + - description: Max number of jobs to return + in: query + name: limit + type: integer + - description: Set to 'true' for admin cross-user listing + in: query + name: all_users + type: string + produces: + - application/json + responses: + "200": + description: jobs + schema: + items: + $ref: '#/definitions/schema.Job' + type: array + summary: List agent jobs + tags: + - agent-jobs + /api/agent/jobs/{id}: + delete: + parameters: + - description: Job ID + in: path + name: id + required: true + type: string + produces: + - application/json + responses: + "200": + description: message + schema: + additionalProperties: + type: string + type: object + "404": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Delete an agent job + tags: + - agent-jobs + get: + parameters: + - description: Job ID + in: path + name: id + required: true + type: string + produces: + - application/json + responses: + "200": + description: job + schema: + $ref: '#/definitions/schema.Job' + "404": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Get an agent job + tags: + - agent-jobs + /api/agent/jobs/{id}/cancel: + post: + parameters: + - description: Job ID + in: path + name: id + required: true + type: string + produces: + - application/json + responses: + "200": + description: message + schema: + additionalProperties: + type: string + type: object + "400": + description: error + schema: + additionalProperties: + type: string + type: object + "404": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Cancel an agent job + tags: + - agent-jobs + /api/agent/jobs/execute: + post: + consumes: + - application/json + parameters: + - description: Job execution request + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.JobExecutionRequest' + produces: + - application/json + responses: + "201": + description: job created + schema: + $ref: '#/definitions/schema.JobExecutionResponse' + "400": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Execute an agent job + tags: + - agent-jobs + /api/agent/tasks: + get: + parameters: + - description: Set to 'true' for admin cross-user listing + in: query + name: all_users + type: string + produces: + - application/json + responses: + "200": + description: tasks + schema: + items: + $ref: '#/definitions/schema.Task' + type: array + summary: List agent tasks + tags: + - agent-jobs + post: + consumes: + - application/json + parameters: + - description: Task definition + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.Task' + produces: + - application/json + responses: + "201": + description: id + schema: + additionalProperties: + type: string + type: object + "400": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Create a new agent task + tags: + - agent-jobs + /api/agent/tasks/{id}: + delete: + parameters: + - description: Task ID + in: path + name: id + required: true + type: string + produces: + - application/json + responses: + "200": + description: message + schema: + additionalProperties: + type: string + type: object + "404": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Delete an agent task + tags: + - agent-jobs + get: + parameters: + - description: Task ID + in: path + name: id + required: true + type: string + produces: + - application/json + responses: + "200": + description: task + schema: + $ref: '#/definitions/schema.Task' + "404": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Get an agent task + tags: + - agent-jobs + put: + consumes: + - application/json + parameters: + - description: Task ID + in: path + name: id + required: true + type: string + - description: Updated task definition + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.Task' + produces: + - application/json + responses: + "200": + description: message + schema: + additionalProperties: + type: string + type: object + "400": + description: error + schema: + additionalProperties: + type: string + type: object + "404": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Update an agent task + tags: + - agent-jobs + /api/agent/tasks/{name}/execute: + post: + consumes: + - application/json + parameters: + - description: Task name + in: path + name: name + required: true + type: string + - description: Optional template parameters + in: body + name: parameters + schema: + type: object + produces: + - application/json + responses: + "201": + description: job created + schema: + $ref: '#/definitions/schema.JobExecutionResponse' + "400": + description: error + schema: + additionalProperties: + type: string + type: object + "404": + description: error + schema: + additionalProperties: + type: string + type: object + summary: Execute an agent task by name + tags: + - agent-jobs + /api/models/config-json/{name}: + patch: + consumes: + - application/json + description: Deep-merges the JSON patch body into the existing model config + parameters: + - description: Model name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: success message + schema: + additionalProperties: true + type: object + summary: Partially update a model configuration + tags: + - config + /api/models/config-metadata: + get: + description: Returns ~170 config fields with types, UI hints, sections, and + options + produces: + - application/json + responses: + "200": + description: List of field metadata + schema: + items: + $ref: '#/definitions/meta.FieldMeta' + type: array + summary: List all model configuration field metadata + tags: + - config + /api/models/config-metadata/autocomplete/{provider}: + get: + description: Returns runtime-resolved values for dynamic providers (backends, + models) + parameters: + - description: Provider name (backends, models, models:chat, models:tts, models:transcript, + models:vad) + in: path + name: provider + required: true + type: string + produces: + - application/json + responses: + "200": + description: values array + schema: + additionalProperties: true + type: object + summary: Get dynamic autocomplete values for a config field + tags: + - config + /api/models/vram-estimate: + post: + consumes: + - application/json + description: Estimates VRAM based on model weight files, context size, and GPU + layers + parameters: + - description: VRAM estimation parameters + in: body + name: request + required: true + schema: + $ref: '#/definitions/localai.vramEstimateRequest' + produces: + - application/json + responses: + "200": + description: VRAM estimate + schema: + $ref: '#/definitions/localai.vramEstimateResponse' + summary: Estimate VRAM usage for a model + tags: + - config /api/p2p: get: responses: @@ -1379,6 +2110,8 @@ paths: $ref: '#/definitions/schema.P2PNodesResponse' type: array summary: Returns available P2P nodes + tags: + - p2p /api/p2p/token: get: responses: @@ -1387,6 +2120,8 @@ paths: schema: type: string summary: Show the P2P token + tags: + - p2p /backend/monitor: get: parameters: @@ -1402,6 +2137,8 @@ paths: schema: $ref: '#/definitions/proto.StatusResponse' summary: Backend monitor endpoint + tags: + - monitoring /backend/shutdown: post: parameters: @@ -1412,7 +2149,9 @@ paths: schema: $ref: '#/definitions/schema.BackendMonitorRequest' responses: {} - summary: Backend monitor endpoint + summary: Backend shutdown endpoint + tags: + - monitoring /backends: get: responses: @@ -1423,6 +2162,8 @@ paths: $ref: '#/definitions/gallery.GalleryBackend' type: array summary: List all Backends + tags: + - backends /backends/apply: post: parameters: @@ -1438,6 +2179,8 @@ paths: schema: $ref: '#/definitions/schema.BackendResponse' summary: Install backends to LocalAI. + tags: + - backends /backends/available: get: responses: @@ -1448,6 +2191,8 @@ paths: $ref: '#/definitions/gallery.GalleryBackend' type: array summary: List all available Backends + tags: + - backends /backends/delete/{name}: post: parameters: @@ -1462,6 +2207,8 @@ paths: schema: $ref: '#/definitions/schema.BackendResponse' summary: delete backends from LocalAI. + tags: + - backends /backends/galleries: get: responses: @@ -1472,6 +2219,8 @@ paths: $ref: '#/definitions/config.Gallery' type: array summary: List all Galleries + tags: + - backends /backends/jobs: get: responses: @@ -1482,6 +2231,8 @@ paths: $ref: '#/definitions/services.GalleryOpStatus' type: object summary: Returns all the jobs status progress + tags: + - backends /backends/jobs/{uuid}: get: responses: @@ -1490,17 +2241,20 @@ paths: schema: $ref: '#/definitions/services.GalleryOpStatus' summary: Returns the job status + tags: + - backends /metrics: get: - parameters: - - description: Gallery details - in: body - name: request - required: true - schema: - $ref: '#/definitions/config.Gallery' - responses: {} + produces: + - text/plain + responses: + "200": + description: Prometheus metrics + schema: + type: string summary: Prometheus metrics endpoint + tags: + - monitoring /models/apply: post: parameters: @@ -1516,6 +2270,8 @@ paths: schema: $ref: '#/definitions/schema.GalleryResponse' summary: Install models to LocalAI. + tags: + - models /models/available: get: responses: @@ -1523,9 +2279,11 @@ paths: description: Response schema: items: - $ref: '#/definitions/gallery.GalleryModel' + $ref: '#/definitions/gallery.Metadata' type: array summary: List installable models. + tags: + - models /models/delete/{name}: post: parameters: @@ -1540,6 +2298,8 @@ paths: schema: $ref: '#/definitions/schema.GalleryResponse' summary: delete models to LocalAI. + tags: + - models /models/galleries: get: responses: @@ -1550,6 +2310,8 @@ paths: $ref: '#/definitions/config.Gallery' type: array summary: List all Galleries + tags: + - models /models/jobs: get: responses: @@ -1560,6 +2322,8 @@ paths: $ref: '#/definitions/services.GalleryOpStatus' type: object summary: Returns all the jobs status progress + tags: + - models /models/jobs/{uuid}: get: responses: @@ -1568,6 +2332,8 @@ paths: schema: $ref: '#/definitions/services.GalleryOpStatus' summary: Returns the job status + tags: + - models /system: get: responses: @@ -1576,6 +2342,8 @@ paths: schema: $ref: '#/definitions/schema.SystemInformationResponse' summary: Show the LocalAI instance information + tags: + - monitoring /tokenMetrics: get: consumes: @@ -1588,6 +2356,8 @@ paths: schema: type: string summary: Get TokenMetrics for Active Slot. + tags: + - tokenize /tts: post: consumes: @@ -1607,6 +2377,8 @@ paths: schema: type: string summary: Generates audio from the input text. + tags: + - audio /v1/audio/speech: post: consumes: @@ -1626,6 +2398,8 @@ paths: schema: type: string summary: Generates audio from the input text. + tags: + - audio /v1/audio/transcriptions: post: consumes: @@ -1649,6 +2423,8 @@ paths: type: string type: object summary: Transcribes audio into the input language. + tags: + - audio /v1/chat/completions: post: parameters: @@ -1664,6 +2440,8 @@ paths: schema: $ref: '#/definitions/schema.OpenAIResponse' summary: Generate a chat completions for a given prompt and model. + tags: + - inference /v1/completions: post: parameters: @@ -1679,6 +2457,8 @@ paths: schema: $ref: '#/definitions/schema.OpenAIResponse' summary: Generate completions for a given prompt and model. + tags: + - inference /v1/detection: post: parameters: @@ -1694,6 +2474,8 @@ paths: schema: $ref: '#/definitions/schema.DetectionResponse' summary: Detects objects in the input image. + tags: + - detection /v1/edits: post: parameters: @@ -1709,6 +2491,8 @@ paths: schema: $ref: '#/definitions/schema.OpenAIResponse' summary: OpenAI edit endpoint + tags: + - inference /v1/embeddings: post: parameters: @@ -1725,6 +2509,8 @@ paths: $ref: '#/definitions/schema.OpenAIResponse' summary: Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. + tags: + - embeddings /v1/images/generations: post: parameters: @@ -1740,6 +2526,8 @@ paths: schema: $ref: '#/definitions/schema.OpenAIResponse' summary: Creates an image given a prompt. + tags: + - images /v1/images/inpainting: post: consumes: @@ -1808,6 +2596,8 @@ paths: schema: $ref: '#/definitions/schema.OpenAIResponse' summary: MCP chat completions with automatic tool execution + tags: + - mcp /v1/messages: post: parameters: @@ -1823,6 +2613,8 @@ paths: schema: $ref: '#/definitions/schema.AnthropicResponse' summary: Generate a message response for the given messages and model. + tags: + - inference /v1/models: get: responses: @@ -1831,6 +2623,8 @@ paths: schema: $ref: '#/definitions/schema.ModelsDataResponse' summary: List and describe the various models available in the API. + tags: + - models /v1/rerank: post: parameters: @@ -1846,6 +2640,8 @@ paths: schema: $ref: '#/definitions/schema.JINARerankResponse' summary: Reranks a list of phrases by relevance to a given text query. + tags: + - rerank /v1/responses: post: parameters: @@ -1861,6 +2657,8 @@ paths: schema: $ref: '#/definitions/schema.ORResponseResource' summary: Create a response using the Open Responses API + tags: + - inference /v1/responses/{id}: get: description: Retrieve a response by ID. Can be used for polling background responses @@ -1895,6 +2693,8 @@ paths: additionalProperties: true type: object summary: Get a response by ID + tags: + - inference /v1/responses/{id}/cancel: post: description: Cancel a background response if it's still in progress @@ -1920,6 +2720,8 @@ paths: additionalProperties: true type: object summary: Cancel a response + tags: + - inference /v1/sound-generation: post: parameters: @@ -1935,6 +2737,8 @@ paths: schema: type: string summary: Generates audio from the input text. + tags: + - audio /v1/text-to-speech/{voice-id}: post: parameters: @@ -1955,6 +2759,8 @@ paths: schema: type: string summary: Generates audio from the input text. + tags: + - audio /v1/tokenMetrics: get: consumes: @@ -1967,6 +2773,8 @@ paths: schema: type: string summary: Get TokenMetrics for Active Slot. + tags: + - tokenize /v1/tokenize: post: parameters: @@ -1982,6 +2790,8 @@ paths: schema: $ref: '#/definitions/schema.TokenizeResponse' summary: Tokenize the input. + tags: + - tokenize /vad: post: consumes: @@ -1999,6 +2809,8 @@ paths: schema: $ref: '#/definitions/proto.VADResponse' summary: Detect voice fragments in an audio stream + tags: + - audio /video: post: parameters: @@ -2014,6 +2826,8 @@ paths: schema: $ref: '#/definitions/schema.OpenAIResponse' summary: Creates a video given a prompt. + tags: + - video securityDefinitions: BearerAuth: in: header