diff --git a/core/config/meta/build.go b/core/config/meta/build.go
new file mode 100644
index 000000000000..b45d7c569fcb
--- /dev/null
+++ b/core/config/meta/build.go
@@ -0,0 +1,141 @@
+package meta
+
+import (
+	"reflect"
+	"sort"
+	"sync"
+)
+
+var (
+	cachedMetadata *ConfigMetadata
+	cacheMu        sync.RWMutex
+)
+
+// BuildConfigMetadata reflects on the given struct type (ModelConfig),
+// merges the enrichment registry, and returns the full ConfigMetadata.
+// The result is cached in memory after the first call.
+func BuildConfigMetadata(modelConfigType reflect.Type) *ConfigMetadata {
+	cacheMu.RLock()
+	if cachedMetadata != nil {
+		cacheMu.RUnlock()
+		return cachedMetadata
+	}
+	cacheMu.RUnlock()
+
+	cacheMu.Lock()
+	defer cacheMu.Unlock()
+
+	// Double-check after acquiring write lock
+	if cachedMetadata != nil {
+		return cachedMetadata
+	}
+
+	cachedMetadata = buildConfigMetadataUncached(modelConfigType, DefaultRegistry())
+	return cachedMetadata
+}
+
+// buildConfigMetadataUncached does the actual work without caching.
+// Exported via lowercase for testability through BuildForTest.
+func buildConfigMetadataUncached(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
+	fields := WalkModelConfig(modelConfigType)
+
+	// Apply registry overrides
+	for i := range fields {
+		override, ok := registry[fields[i].Path]
+		if !ok {
+			continue
+		}
+		applyOverride(&fields[i], override)
+	}
+
+	// Sort fields by section order then by field order
+	sectionOrder := make(map[string]int)
+	for _, s := range DefaultSections() {
+		sectionOrder[s.ID] = s.Order
+	}
+
+	sort.SliceStable(fields, func(i, j int) bool {
+		si := sectionOrder[fields[i].Section]
+		sj := sectionOrder[fields[j].Section]
+		if si != sj {
+			return si < sj
+		}
+		return fields[i].Order < fields[j].Order
+	})
+
+	// Collect sections that actually have fields
+	usedSections := make(map[string]bool)
+	for _, f := range fields {
+		usedSections[f.Section] = true
+	}
+
+	var sections []Section
+	for _, s := range DefaultSections() {
+		if usedSections[s.ID] {
+			sections = append(sections, s)
+		}
+	}
+
+	return &ConfigMetadata{
+		Sections: sections,
+		Fields:   fields,
+	}
+}
+
+// applyOverride merges non-zero override values into the field.
+func applyOverride(f *FieldMeta, o FieldMetaOverride) {
+	if o.Section != "" {
+		f.Section = o.Section
+	}
+	if o.Label != "" {
+		f.Label = o.Label
+	}
+	if o.Description != "" {
+		f.Description = o.Description
+	}
+	if o.Component != "" {
+		f.Component = o.Component
+	}
+	if o.Placeholder != "" {
+		f.Placeholder = o.Placeholder
+	}
+	if o.Default != nil {
+		f.Default = o.Default
+	}
+	if o.Min != nil {
+		f.Min = o.Min
+	}
+	if o.Max != nil {
+		f.Max = o.Max
+	}
+	if o.Step != nil {
+		f.Step = o.Step
+	}
+	if o.Options != nil {
+		f.Options = o.Options
+	}
+	if o.AutocompleteProvider != "" {
+		f.AutocompleteProvider = o.AutocompleteProvider
+	}
+	if o.VRAMImpact {
+		f.VRAMImpact = true
+	}
+	if o.Advanced {
+		f.Advanced = true
+	}
+	if o.Order != 0 {
+		f.Order = o.Order
+	}
+}
+
+// BuildForTest builds metadata without caching, for use in tests.
+func BuildForTest(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
+	return buildConfigMetadataUncached(modelConfigType, registry)
+}
+
+// ResetCache clears the cached metadata (useful for testing).
+func ResetCache() {
+	cacheMu.Lock()
+	defer cacheMu.Unlock()
+	cachedMetadata = nil
+}
diff --git a/core/config/meta/build_test.go b/core/config/meta/build_test.go
new file mode 100644
index 000000000000..aa9acb889a8c
--- /dev/null
+++ b/core/config/meta/build_test.go
@@ -0,0 +1,211 @@
+package meta_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config/meta"
+)
+
+func TestBuildConfigMetadata(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	if len(md.Sections) == 0 {
+		t.Fatal("expected sections, got 0")
+	}
+	if len(md.Fields) == 0 {
+		t.Fatal("expected fields, got 0")
+	}
+
+	// Verify sections are ordered
+	for i := 1; i < len(md.Sections); i++ {
+		if md.Sections[i].Order < md.Sections[i-1].Order {
+			t.Errorf("sections not ordered: %s (order=%d) before %s (order=%d)",
+				md.Sections[i-1].ID, md.Sections[i-1].Order,
+				md.Sections[i].ID, md.Sections[i].Order)
+		}
+	}
+}
+
+func TestRegistryOverrides(t *testing.T) {
+	registry := map[string]meta.FieldMetaOverride{
+		"name": {
+			Label:       "My Custom Label",
+			Description: "Custom description",
+			Component:   "textarea",
+			Order:       999,
+		},
+	}
+
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), registry)
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	f, ok := byPath["name"]
+	if !ok {
+		t.Fatal("field 'name' not found")
+	}
+	if f.Label != "My Custom Label" {
+		t.Errorf("expected label 'My Custom Label', got %q", f.Label)
+	}
+	if f.Description != "Custom description" {
+		t.Errorf("expected description 'Custom description', got %q", f.Description)
+	}
+	if f.Component != "textarea" {
+		t.Errorf("expected component 'textarea', got %q", f.Component)
+	}
+	if f.Order != 999 {
+		t.Errorf("expected order 999, got %d", f.Order)
+	}
+}
+
+func TestUnregisteredFieldsGetDefaults(t *testing.T) {
+	// Use empty registry - all fields should still get auto-generated metadata
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), map[string]meta.FieldMetaOverride{})
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// context_size should still exist with auto-generated label
+	f, ok := byPath["context_size"]
+	if !ok {
+		t.Fatal("field 'context_size' not found")
+	}
+	if f.Label == "" {
+		t.Error("expected auto-generated label, got empty")
+	}
+	if f.UIType != "int" {
+		t.Errorf("expected UIType 'int', got %q", f.UIType)
+	}
+	if f.Component == "" {
+		t.Error("expected auto-generated component, got empty")
+	}
+}
+
+func TestDefaultRegistryOverridesApply(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// Verify enriched fields got their overrides
+	tests := []struct {
+		path        string
+		label       string
+		description string
+		vramImpact  bool
+	}{
+		{"context_size", "Context Size", "Maximum context window in tokens", true},
+		{"gpu_layers", "GPU Layers", "Number of layers to offload to GPU (-1 = all)", true},
+		{"backend", "Backend", "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", false},
+		{"parameters.temperature", "Temperature", "Sampling temperature (higher = more creative, lower = more deterministic)", false},
+		{"template.chat", "Chat Template", "Go template for chat completion requests", false},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.Label != tt.label {
+			t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
+		}
+		if f.Description != tt.description {
+			t.Errorf("field %q: expected description %q, got %q", tt.path, tt.description, f.Description)
+		}
+		if f.VRAMImpact != tt.vramImpact {
+			t.Errorf("field %q: expected vramImpact=%v, got %v", tt.path, tt.vramImpact, f.VRAMImpact)
+		}
+	}
+}
+
+func TestStaticOptionsFields(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// Fields with static options should have Options populated and no AutocompleteProvider
+	staticFields := []string{"quantization", "cache_type_k", "cache_type_v", "diffusers.pipeline_type", "diffusers.scheduler_type"}
+	for _, path := range staticFields {
+		f, ok := byPath[path]
+		if !ok {
+			t.Errorf("field %q not found", path)
+			continue
+		}
+		if len(f.Options) == 0 {
+			t.Errorf("field %q: expected Options to be populated", path)
+		}
+		if f.AutocompleteProvider != "" {
+			t.Errorf("field %q: expected no AutocompleteProvider, got %q", path, f.AutocompleteProvider)
+		}
+	}
+}
+
+func TestDynamicProviderFields(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
+	for _, f := range md.Fields {
+		byPath[f.Path] = f
+	}
+
+	// Fields with dynamic providers should have AutocompleteProvider and no Options
+	dynamicFields := map[string]string{
+		"backend":                meta.ProviderBackends,
+		"pipeline.llm":          meta.ProviderModelsChat,
+		"pipeline.tts":          meta.ProviderModelsTTS,
+		"pipeline.transcription": meta.ProviderModelsTranscript,
+		"pipeline.vad":          meta.ProviderModelsVAD,
+	}
+	for path, expectedProvider := range dynamicFields {
+		f, ok := byPath[path]
+		if !ok {
+			t.Errorf("field %q not found", path)
+			continue
+		}
+		if f.AutocompleteProvider != expectedProvider {
+			t.Errorf("field %q: expected AutocompleteProvider %q, got %q", path, expectedProvider, f.AutocompleteProvider)
+		}
+		if len(f.Options) != 0 {
+			t.Errorf("field %q: expected no Options, got %d", path, len(f.Options))
+		}
+	}
+}
+
+func TestVRAMImpactFields(t *testing.T) {
+	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
+
+	var vramFields []string
+	for _, f := range md.Fields {
+		if f.VRAMImpact {
+			vramFields = append(vramFields, f.Path)
+		}
+	}
+
+	if len(vramFields) == 0 {
+		t.Error("expected some VRAM impact fields, got 0")
+	}
+
+	// context_size and gpu_layers should be marked
+	expected := map[string]bool{"context_size": true, "gpu_layers": true}
+	for _, path := range vramFields {
+		if expected[path] {
+			delete(expected, path)
+		}
+	}
+	for path := range expected {
+		t.Errorf("expected VRAM impact field %q not found", path)
+	}
+}
diff --git a/core/config/meta/constants.go b/core/config/meta/constants.go
new file mode 100644
index 000000000000..24e24015fb49
--- /dev/null
+++ b/core/config/meta/constants.go
@@ -0,0 +1,63 @@
+package meta
+
+// Dynamic autocomplete provider constants (runtime lookup required).
+const (
+	ProviderBackends         = "backends"
+	ProviderModels           = "models"
+	ProviderModelsChat       = "models:chat"
+	ProviderModelsTTS        = "models:tts"
+	ProviderModelsTranscript = "models:transcript"
+	ProviderModelsVAD        = "models:vad"
+)
+
+// Static option lists embedded directly in field metadata.
+
+var QuantizationOptions = []FieldOption{
+	{Value: "q4_0", Label: "Q4_0"},
+	{Value: "q4_1", Label: "Q4_1"},
+	{Value: "q5_0", Label: "Q5_0"},
+	{Value: "q5_1", Label: "Q5_1"},
+	{Value: "q8_0", Label: "Q8_0"},
+	{Value: "q2_K", Label: "Q2_K"},
+	{Value: "q3_K_S", Label: "Q3_K_S"},
+	{Value: "q3_K_M", Label: "Q3_K_M"},
+	{Value: "q3_K_L", Label: "Q3_K_L"},
+	{Value: "q4_K_S", Label: "Q4_K_S"},
+	{Value: "q4_K_M", Label: "Q4_K_M"},
+	{Value: "q5_K_S", Label: "Q5_K_S"},
+	{Value: "q5_K_M", Label: "Q5_K_M"},
+	{Value: "q6_K", Label: "Q6_K"},
+}
+
+var CacheTypeOptions = []FieldOption{
+	{Value: "f16", Label: "F16"},
+	{Value: "f32", Label: "F32"},
+	{Value: "q8_0", Label: "Q8_0"},
+	{Value: "q4_0", Label: "Q4_0"},
+	{Value: "q4_1", Label: "Q4_1"},
+	{Value: "q5_0", Label: "Q5_0"},
+	{Value: "q5_1", Label: "Q5_1"},
+}
+
+var DiffusersPipelineOptions = []FieldOption{
+	{Value: "StableDiffusionPipeline", Label: "StableDiffusionPipeline"},
+	{Value: "StableDiffusionImg2ImgPipeline", Label: "StableDiffusionImg2ImgPipeline"},
+	{Value: "StableDiffusionXLPipeline", Label: "StableDiffusionXLPipeline"},
+	{Value: "StableDiffusionXLImg2ImgPipeline", Label: "StableDiffusionXLImg2ImgPipeline"},
+	{Value: "StableDiffusionDepth2ImgPipeline", Label: "StableDiffusionDepth2ImgPipeline"},
+	{Value: "DiffusionPipeline", Label: "DiffusionPipeline"},
+	{Value: "StableVideoDiffusionPipeline", Label: "StableVideoDiffusionPipeline"},
+}
+
+var DiffusersSchedulerOptions = []FieldOption{
+	{Value: "ddim", Label: "DDIM"},
+	{Value: "ddpm", Label: "DDPM"},
+	{Value: "pndm", Label: "PNDM"},
+	{Value: "lms", Label: "LMS"},
+	{Value: "euler", Label: "Euler"},
+	{Value: "euler_a", Label: "Euler A"},
+	{Value: "dpm_multistep", Label: "DPM Multistep"},
+	{Value: "dpm_singlestep", Label: "DPM Singlestep"},
+	{Value: "heun", Label: "Heun"},
+	{Value: "unipc", Label: "UniPC"},
+}
diff --git a/core/config/meta/reflect.go b/core/config/meta/reflect.go
new file mode 100644
index 000000000000..ef1d0b4b07ad
--- /dev/null
+++ b/core/config/meta/reflect.go
@@ -0,0 +1,259 @@
+package meta
+
+import (
+	"reflect"
+	"strings"
+	"unicode"
+)
+
+// WalkModelConfig uses reflection to discover all exported, YAML-tagged fields
+// in the given struct type (expected to be config.ModelConfig) and returns a
+// slice of FieldMeta with sensible defaults derived from the type information.
+func WalkModelConfig(t reflect.Type) []FieldMeta {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+	var fields []FieldMeta
+	walkStruct(t, "", "", &fields)
+	return fields
+}
+
+// walkStruct recursively walks a struct type, collecting FieldMeta entries.
+// prefix is the dot-path prefix for nested structs (e.g. "function.grammar.").
+// parentYAMLPrefix is used for inline embedding with prefix (e.g. "parameters.").
+func walkStruct(t reflect.Type, prefix, parentYAMLPrefix string, out *[]FieldMeta) {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+	if t.Kind() != reflect.Struct {
+		return
+	}
+
+	for i := range t.NumField() {
+		sf := t.Field(i)
+
+		// Skip unexported fields
+		if !sf.IsExported() {
+			continue
+		}
+
+		yamlTag := sf.Tag.Get("yaml")
+		if yamlTag == "-" {
+			continue
+		}
+
+		yamlKey, opts := parseTag(yamlTag)
+
+		// Handle inline embedding (e.g. LLMConfig `yaml:",inline"`)
+		if opts.contains("inline") {
+			ft := sf.Type
+			if ft.Kind() == reflect.Pointer {
+				ft = ft.Elem()
+			}
+			if ft.Kind() == reflect.Struct {
+				walkStruct(ft, prefix, parentYAMLPrefix, out)
+			}
+			continue
+		}
+
+		// If no yaml key and it's an embedded struct without inline, skip unknown pattern
+		if yamlKey == "" {
+			ft := sf.Type
+			if ft.Kind() == reflect.Pointer {
+				ft = ft.Elem()
+			}
+			// Anonymous struct without yaml tag - treat as inline
+			if sf.Anonymous && ft.Kind() == reflect.Struct {
+				walkStruct(ft, prefix, parentYAMLPrefix, out)
+				continue
+			}
+			// Named field without yaml tag - skip
+			continue
+		}
+
+		ft := sf.Type
+		isPtr := ft.Kind() == reflect.Pointer
+		if isPtr {
+			ft = ft.Elem()
+		}
+
+		// Named nested struct (not a special type) -> recurse with prefix
+		if ft.Kind() == reflect.Struct && !isSpecialType(ft) {
+			nestedPrefix := prefix + yamlKey + "."
+			walkStruct(ft, nestedPrefix, "", out)
+			continue
+		}
+
+		// Leaf field
+		path := prefix + yamlKey
+		goType := sf.Type.String()
+		uiType, component := inferUIType(sf.Type)
+		section := inferSection(prefix)
+		label := labelFromKey(yamlKey)
+
+		*out = append(*out, FieldMeta{
+			Path:      path,
+			YAMLKey:   yamlKey,
+			GoType:    goType,
+			UIType:    uiType,
+			Pointer:   isPtr,
+			Section:   section,
+			Label:     label,
+			Component: component,
+			Order:     len(*out),
+		})
+	}
+}
+
+// isSpecialType returns true for struct types that should be treated as leaf
+// values rather than recursed into (e.g. custom JSON marshalers).
+func isSpecialType(t reflect.Type) bool {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+	name := t.Name()
+	// LogprobsValue, URI types are leaf values despite being structs
+	switch name {
+	case "LogprobsValue", "URI":
+		return true
+	}
+	return false
+}
+
+// inferUIType maps a Go reflect.Type to a UI type string and default component.
+func inferUIType(t reflect.Type) (uiType, component string) {
+	if t.Kind() == reflect.Pointer {
+		t = t.Elem()
+	}
+
+	switch t.Kind() {
+	case reflect.Bool:
+		return "bool", "toggle"
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return "int", "number"
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		return "int", "number"
+	case reflect.Float32, reflect.Float64:
+		return "float", "number"
+	case reflect.String:
+		return "string", "input"
+	case reflect.Slice:
+		elem := t.Elem()
+		if elem.Kind() == reflect.String {
+			return "[]string", "string-list"
+		}
+		if elem.Kind() == reflect.Pointer {
+			elem = elem.Elem()
+		}
+		if elem.Kind() == reflect.Struct {
+			return "[]object", "json-editor"
+		}
+		return "[]any", "json-editor"
+	case reflect.Map:
+		return "map", "map-editor"
+	case reflect.Struct:
+		// Special types treated as leaves
+		if isSpecialType(t) {
+			return "bool", "toggle" // LogprobsValue
+		}
+		return "object", "json-editor"
+	default:
+		return "any", "input"
+	}
+}
+
+// inferSection determines the config section from the dot-path prefix.
+func inferSection(prefix string) string {
+	if prefix == "" {
+		return "general"
+	}
+	// Remove trailing dot
+	p := strings.TrimSuffix(prefix, ".")
+
+	// Use the top-level prefix to determine section
+	parts := strings.SplitN(p, ".", 2)
+	top := parts[0]
+
+	switch top {
+	case "parameters":
+		return "parameters"
+	case "template":
+		return "templates"
+	case "function":
+		return "functions"
+	case "reasoning":
+		return "reasoning"
+	case "diffusers":
+		return "diffusers"
+	case "tts":
+		return "tts"
+	case "pipeline":
+		return "pipeline"
+	case "grpc":
+		return "grpc"
+	case "agent":
+		return "agent"
+	case "mcp":
+		return "mcp"
+	case "feature_flags":
+		return "other"
+	case "limit_mm_per_prompt":
+		return "llm"
+	default:
+		return "other"
+	}
+}
+
+// labelFromKey converts a yaml key like "context_size" to "Context Size".
+func labelFromKey(key string) string {
+	parts := strings.Split(key, "_")
+	for i, p := range parts {
+		if len(p) > 0 {
+			runes := []rune(p)
+			runes[0] = unicode.ToUpper(runes[0])
+			parts[i] = string(runes)
+		}
+	}
+	return strings.Join(parts, " ")
+}
+
+// tagOptions is a set of comma-separated yaml tag options.
+type tagOptions string
+
+func (o tagOptions) contains(optName string) bool {
+	s := string(o)
+	for s != "" {
+		var name string
+		if name, s, _ = strings.Cut(s, ","); name == optName {
+			return true
+		}
+	}
+	return false
+}
+
+// parseTag splits a yaml struct tag into the key name and options.
+func parseTag(tag string) (string, tagOptions) {
+	if tag == "" {
+		return "", ""
+	}
+	before, after, found := strings.Cut(tag, ",")
+	if found {
+		return before, tagOptions(after)
+	}
+	return tag, ""
+}
+
+// SectionForPath returns the section ID for a given dot-path.
+// Exported so tests and the registry can use it.
+func SectionForPath(path string) string {
+	before, _, found := strings.Cut(path, ".")
+	if !found {
+		return "general"
+	}
+	return inferSection(before + ".")
+}
+
+// GoTypeName returns a human-readable Go type string for display.
+func GoTypeName(t reflect.Type) string {
+	return t.String()
+}
diff --git a/core/config/meta/reflect_test.go b/core/config/meta/reflect_test.go
new file mode 100644
index 000000000000..408bb2a1ecc3
--- /dev/null
+++ b/core/config/meta/reflect_test.go
@@ -0,0 +1,208 @@
+package meta_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config/meta"
+)
+
+func TestWalkModelConfig(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+	if len(fields) == 0 {
+		t.Fatal("expected fields from ModelConfig, got 0")
+	}
+
+	// Build a lookup by path
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	// Verify some top-level fields exist
+	for _, path := range []string{"name", "backend", "cuda", "step"} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected field %q not found", path)
+		}
+	}
+
+	// Verify inline LLMConfig fields appear at top level (no prefix)
+	for _, path := range []string{"context_size", "gpu_layers", "threads", "mmap"} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected inline LLMConfig field %q not found", path)
+		}
+	}
+
+	// Verify nested struct fields have correct prefix
+	for _, path := range []string{
+		"template.chat",
+		"template.completion",
+		"template.use_tokenizer_template",
+		"function.grammar.parallel_calls",
+		"function.grammar.mixed_mode",
+		"diffusers.pipeline_type",
+		"diffusers.cuda",
+		"pipeline.llm",
+		"pipeline.tts",
+		"reasoning.disable",
+		"agent.max_iterations",
+		"grpc.attempts",
+	} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected nested field %q not found", path)
+		}
+	}
+
+	// Verify PredictionOptions fields have parameters. prefix
+	for _, path := range []string{
+		"parameters.temperature",
+		"parameters.top_p",
+		"parameters.top_k",
+		"parameters.max_tokens",
+		"parameters.seed",
+	} {
+		if _, ok := byPath[path]; !ok {
+			t.Errorf("expected parameters field %q not found", path)
+		}
+	}
+
+	// Verify TTSConfig fields have tts. prefix
+	if _, ok := byPath["tts.voice"]; !ok {
+		t.Error("expected tts.voice field not found")
+	}
+}
+
+func TestSkipsYAMLDashFields(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	// modelConfigFile has yaml:"-" tag, should be skipped
+	for _, f := range fields {
+		if f.Path == "modelConfigFile" || f.Path == "modelTemplate" {
+			t.Errorf("field %q should have been skipped (yaml:\"-\")", f.Path)
+		}
+	}
+}
+
+func TestTypeMapping(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	tests := []struct {
+		path    string
+		uiType  string
+		pointer bool
+	}{
+		{"name", "string", false},
+		{"cuda", "bool", false},
+		{"context_size", "int", true},
+		{"gpu_layers", "int", true},
+		{"threads", "int", true},
+		{"f16", "bool", true},
+		{"mmap", "bool", true},
+		{"stopwords", "[]string", false},
+		{"roles", "map", false},
+		{"parameters.temperature", "float", true},
+		{"parameters.top_k", "int", true},
+		{"function.grammar.parallel_calls", "bool", false},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.UIType != tt.uiType {
+			t.Errorf("field %q: expected UIType %q, got %q", tt.path, tt.uiType, f.UIType)
+		}
+		if f.Pointer != tt.pointer {
+			t.Errorf("field %q: expected Pointer=%v, got %v", tt.path, tt.pointer, f.Pointer)
+		}
+	}
+}
+
+func TestSectionAssignment(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	tests := []struct {
+		path    string
+		section string
+	}{
+		{"name", "general"},
+		{"backend", "general"},
+		{"context_size", "general"},   // inline LLMConfig -> no prefix -> general
+		{"parameters.temperature", "parameters"},
+		{"template.chat", "templates"},
+		{"function.grammar.parallel_calls", "functions"},
+		{"diffusers.cuda", "diffusers"},
+		{"pipeline.llm", "pipeline"},
+		{"reasoning.disable", "reasoning"},
+		{"agent.max_iterations", "agent"},
+		{"grpc.attempts", "grpc"},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.Section != tt.section {
+			t.Errorf("field %q: expected section %q, got %q", tt.path, tt.section, f.Section)
+		}
+	}
+}
+
+func TestLabelGeneration(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+
+	byPath := make(map[string]meta.FieldMeta, len(fields))
+	for _, f := range fields {
+		byPath[f.Path] = f
+	}
+
+	tests := []struct {
+		path  string
+		label string
+	}{
+		{"context_size", "Context Size"},
+		{"gpu_layers", "Gpu Layers"},
+		{"name", "Name"},
+		{"cuda", "Cuda"},
+	}
+
+	for _, tt := range tests {
+		f, ok := byPath[tt.path]
+		if !ok {
+			t.Errorf("field %q not found", tt.path)
+			continue
+		}
+		if f.Label != tt.label {
+			t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
+		}
+	}
+}
+
+func TestFieldCount(t *testing.T) {
+	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
+	// We expect a large number of fields (100+) given the config complexity
+	if len(fields) < 80 {
+		t.Errorf("expected at least 80 fields, got %d", len(fields))
+	}
+	t.Logf("Total fields discovered: %d", len(fields))
+}
diff --git a/core/config/meta/registry.go b/core/config/meta/registry.go
new file mode 100644
index 000000000000..bebba468dc2d
--- /dev/null
+++ b/core/config/meta/registry.go
@@ -0,0 +1,314 @@
+package meta
+
+// DefaultRegistry returns enrichment overrides for the ~30 most commonly used
+// config fields. Fields not listed here still appear with auto-generated
+// labels and type-inferred components.
+func DefaultRegistry() map[string]FieldMetaOverride {
+	f64 := func(v float64) *float64 { return &v }
+
+	return map[string]FieldMetaOverride{
+		// --- General ---
+		"name": {
+			Section:     "general",
+			Label:       "Model Name",
+			Description: "Unique identifier for this model configuration",
+			Component:   "input",
+			Order:       0,
+		},
+		"backend": {
+			Section:              "general",
+			Label:                "Backend",
+			Description:          "The inference backend to use (e.g. llama-cpp, vllm, diffusers)",
+			Component:            "select",
+			AutocompleteProvider: ProviderBackends,
+			Order:                1,
+		},
+		"description": {
+			Section:     "general",
+			Label:       "Description",
+			Description: "Human-readable description of what this model does",
+			Component:   "textarea",
+			Order:       2,
+		},
+		"usage": {
+			Section:     "general",
+			Label:       "Usage",
+			Description: "Usage instructions or notes",
+			Component:   "textarea",
+			Advanced:    true,
+			Order:       3,
+		},
+		"cuda": {
+			Section:     "general",
+			Label:       "CUDA",
+			Description: "Explicitly enable CUDA acceleration",
+			Order:       5,
+		},
+		"known_usecases": {
+			Section:     "general",
+			Label:       "Known Use Cases",
+			Description: "Capabilities this model supports (e.g. FLAG_CHAT, FLAG_COMPLETION)",
+			Component:   "string-list",
+			Order:       6,
+		},
+
+		// --- LLM ---
+		"context_size": {
+			Section:     "llm",
+			Label:       "Context Size",
+			Description: "Maximum context window in tokens",
+			Component:   "number",
+			VRAMImpact:  true,
+			Order:       10,
+		},
+		"gpu_layers": {
+			Section:     "llm",
+			Label:       "GPU Layers",
+			Description: "Number of layers to offload to GPU (-1 = all)",
+			Component:   "number",
+			Min:         f64(-1),
+			VRAMImpact:  true,
+			Order:       11,
+		},
+		"threads": {
+			Section:     "llm",
+			Label:       "Threads",
+			Description: "Number of CPU threads for inference",
+			Component:   "number",
+			Min:         f64(1),
+			Order:       12,
+		},
+		"f16": {
+			Section:     "llm",
+			Label:       "F16",
+			Description: "Use 16-bit floating point for key/value cache",
+			Order:       13,
+		},
+		"mmap": {
+			Section:     "llm",
+			Label:       "Memory Map",
+			Description: "Use memory-mapped files for model loading",
+			Order:       14,
+		},
+		"mmlock": {
+			Section:     "llm",
+			Label:       "Memory Lock",
+			Description: "Lock model memory to prevent swapping",
+			Advanced:    true,
+			Order:       15,
+		},
+		"low_vram": {
+			Section:     "llm",
+			Label:       "Low VRAM",
+			Description: "Optimize for systems with limited GPU memory",
+			VRAMImpact:  true,
+			Order:       16,
+		},
+		"embeddings": {
+			Section:     "llm",
+			Label:       "Embeddings",
+			Description: "Enable embedding generation mode",
+			Order:       17,
+		},
+		"quantization": {
+			Section:     "llm",
+			Label:       "Quantization",
+			Description: "Quantization method (e.g. q4_0, q5_1, q8_0)",
+			Component:   "select",
+			Options:     QuantizationOptions,
+			Advanced:    true,
+			Order:       20,
+		},
+		"flash_attention": {
+			Section:     "llm",
+			Label:       "Flash Attention",
+			Description: "Enable flash attention for faster inference",
+			Component:   "input",
+			Advanced:    true,
+			Order:       21,
+		},
+		"cache_type_k": {
+			Section:     "llm",
+			Label:       "KV Cache Type (K)",
+			Description: "Quantization type for key cache (e.g. f16, q8_0, q4_0)",
+			Component:   "select",
+			Options:     CacheTypeOptions,
+			VRAMImpact:  true,
+			Advanced:    true,
+			Order:       22,
+		},
+		"cache_type_v": {
+			Section:     "llm",
+			Label:       "KV Cache Type (V)",
+			Description: "Quantization type for value cache",
+			Component:   "select",
+			Options:     CacheTypeOptions,
+			VRAMImpact:  true,
+			Advanced:    true,
+			Order:       23,
+		},
+
+		// --- Parameters ---
+		"parameters.temperature": {
+			Section:     "parameters",
+			Label:       "Temperature",
+			Description: "Sampling temperature (higher = more creative, lower = more deterministic)",
+			Component:   "slider",
+			Min:         f64(0),
+			Max:         f64(2),
+			Step:        f64(0.05),
+			Order:       30,
+		},
+		"parameters.top_p": {
+			Section:     "parameters",
+			Label:       "Top P",
+			Description: "Nucleus sampling threshold",
+			Component:   "slider",
+			Min:         f64(0),
+			Max:         f64(1),
+			Step:        f64(0.01),
+			Order:       31,
+		},
+		"parameters.top_k": {
+			Section:     "parameters",
+			Label:       "Top K",
+			Description: "Top-K sampling: consider only the K most likely tokens",
+			Component:   "number",
+			Min:         f64(0),
+			Order:       32,
+		},
+		"parameters.max_tokens": {
+			Section:     "parameters",
+			Label:       "Max Tokens",
+			Description: "Maximum number of tokens to generate (0 = unlimited)",
+			Component:   "number",
+			Min:         f64(0),
+			Order:       33,
+		},
+		"parameters.repeat_penalty": {
+			Section:     "parameters",
+			Label:       "Repeat Penalty",
+			Description: "Penalize repeated tokens (1.0 = no penalty)",
+			Component:   "number",
+			Min:         f64(0),
+			Advanced:    true,
+			Order:       34,
+		},
+		"parameters.seed": {
+			Section:     "parameters",
+			Label:       "Seed",
+			Description: "Random seed (-1 = random)",
+			Component:   "number",
+			Advanced:    true,
+			Order:       35,
+		},
+
+		// --- Templates ---
+		"template.chat": {
+			Section:     "templates",
+			Label:       "Chat Template",
+			Description: "Go template for chat completion requests",
+			Component:   "code-editor",
+			Order:       40,
+		},
+		"template.chat_message": {
+			Section:     "templates",
+			Label:       "Chat Message Template",
+			Description: "Go template for individual chat messages",
+			Component:   "code-editor",
+			Order:       41,
+		},
+		"template.completion": {
+			Section:     "templates",
+			Label:       "Completion Template",
+			Description: "Go template for completion requests",
+			Component:   "code-editor",
+			Order:       42,
+		},
+		"template.use_tokenizer_template": {
+			Section:     "templates",
+			Label:       "Use Tokenizer Template",
+			Description: "Use the chat template from the model's tokenizer config",
+			Order:       43,
+		},
+
+		// --- Pipeline ---
+		"pipeline.llm": {
+			Section:              "pipeline",
+			Label:                "LLM Model",
+			Description:          "Model to use for LLM inference in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsChat,
+			Order:                60,
+		},
+		"pipeline.tts": {
+			Section:              "pipeline",
+			Label:                "TTS Model",
+			Description:          "Model to use for text-to-speech in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsTTS,
+			Order:                61,
+		},
+		"pipeline.transcription": {
+			Section:              "pipeline",
+			Label:                "Transcription Model",
+			Description:          "Model to use for speech-to-text in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsTranscript,
+			Order:                62,
+		},
+		"pipeline.vad": {
+			Section:              "pipeline",
+			Label:                "VAD Model",
+			Description:          "Model to use for voice activity detection in the pipeline",
+			Component:            "model-select",
+			AutocompleteProvider: ProviderModelsVAD,
+			Order:                63,
+		},
+
+		// --- Functions ---
+		"function.grammar.parallel_calls": {
+			Section:     "functions",
+			Label:       "Parallel Calls",
+			Description: "Allow the LLM to return multiple function calls in one response",
+			Order:       70,
+		},
+		"function.grammar.mixed_mode": {
+			Section:     "functions",
+			Label:       "Mixed Mode",
+			Description: "Allow the LLM to return both text and function calls",
+			Order:       71,
+		},
+		"function.grammar.disable": {
+			Section:     "functions",
+			Label:       "Disable Grammar",
+			Description: "Disable grammar-constrained generation for function calls",
+			Advanced:    true,
+			Order:       72,
+		},
+
+		// --- Diffusers ---
+		"diffusers.pipeline_type": {
+			Section:     "diffusers",
+			Label:       "Pipeline Type",
+			Description: "Diffusers pipeline type (e.g. StableDiffusionPipeline)",
+			Component:   "select",
+			Options:     DiffusersPipelineOptions,
+			Order:       80,
+		},
+		"diffusers.scheduler_type": {
+			Section:     "diffusers",
+			Label:       "Scheduler Type",
+			Description: "Noise scheduler type",
+			Component:   "select",
+			Options:     DiffusersSchedulerOptions,
+			Order:       81,
+		},
+		"diffusers.cuda": {
+			Section:     "diffusers",
+			Label:       "CUDA",
+			Description: "Enable CUDA for diffusers",
+			Order:       82,
+		},
+	}
+}
diff --git a/core/config/meta/types.go b/core/config/meta/types.go
new file mode 100644
index 000000000000..dcd21fb55806
--- /dev/null
+++ b/core/config/meta/types.go
@@ -0,0 +1,83 @@
+package meta
+
+// FieldMeta describes a single configuration field for UI rendering and agent discovery.
+type FieldMeta struct {
+	Path        string        `json:"path"`                  // dot-path: "context_size", "function.grammar.parallel_calls"
+	YAMLKey     string        `json:"yaml_key"`              // leaf yaml key
+	GoType      string        `json:"go_type"`               // "*int", "string", "[]string"
+	UIType      string        `json:"ui_type"`               // "string", "int", "float", "bool", "[]string", "map", "object"
+	Pointer     bool          `json:"pointer,omitempty"`     // true = nil means "not set"
+	Section     string        `json:"section"`               // "general", "llm", "templates", etc.
+	Label       string        `json:"label"`                 // human-readable label
+	Description string        `json:"description,omitempty"` // help text
+	Component   string        `json:"component"`             // "input", "number", "toggle", "select", "slider", etc.
+	Placeholder string        `json:"placeholder,omitempty"`
+	Default     any           `json:"default,omitempty"`
+	Min         *float64      `json:"min,omitempty"`
+	Max         *float64      `json:"max,omitempty"`
+	Step        *float64      `json:"step,omitempty"`
+	Options     []FieldOption `json:"options,omitempty"`
+
+	AutocompleteProvider string `json:"autocomplete_provider,omitempty"` // "backends", "models:chat", etc.
+	VRAMImpact           bool   `json:"vram_impact,omitempty"`
+	Advanced             bool   `json:"advanced,omitempty"`
+	Order                int    `json:"order"`
+}
+
+// FieldOption represents a choice in a select/enum field.
+type FieldOption struct {
+	Value string `json:"value"`
+	Label string `json:"label"`
+}
+
+// Section groups related fields in the UI.
+type Section struct {
+	ID    string `json:"id"`
+	Label string `json:"label"`
+	Icon  string `json:"icon,omitempty"`
+	Order int    `json:"order"`
+}
+
+// ConfigMetadata is the top-level response for the metadata API.
+type ConfigMetadata struct {
+	Sections []Section   `json:"sections"`
+	Fields   []FieldMeta `json:"fields"`
+}
+
+// FieldMetaOverride holds registry overrides that are merged on top of
+// the reflection-discovered defaults. Only non-zero fields override.
+type FieldMetaOverride struct {
+	Section              string
+	Label                string
+	Description          string
+	Component            string
+	Placeholder          string
+	Default              any
+	Min                  *float64
+	Max                  *float64
+	Step                 *float64
+	Options              []FieldOption
+	AutocompleteProvider string
+	VRAMImpact           bool
+	Advanced             bool
+	Order                int
+}
+
+// DefaultSections defines the well-known config sections in display order.
+func DefaultSections() []Section {
+	return []Section{
+		{ID: "general", Label: "General", Icon: "settings", Order: 0},
+		{ID: "llm", Label: "LLM", Icon: "cpu", Order: 10},
+		{ID: "parameters", Label: "Parameters", Icon: "sliders", Order: 20},
+		{ID: "templates", Label: "Templates", Icon: "file-text", Order: 30},
+		{ID: "functions", Label: "Functions / Tools", Icon: "tool", Order: 40},
+		{ID: "reasoning", Label: "Reasoning", Icon: "brain", Order: 45},
+		{ID: "diffusers", Label: "Diffusers", Icon: "image", Order: 50},
+		{ID: "tts", Label: "TTS", Icon: "volume-2", Order: 55},
+		{ID: "pipeline", Label: "Pipeline", Icon: "git-merge", Order: 60},
+		{ID: "grpc", Label: "gRPC", Icon: "server", Order: 65},
+		{ID: "agent", Label: "Agent", Icon: "bot", Order: 70},
+		{ID: "mcp", Label: "MCP", Icon: "plug", Order: 75},
+		{ID: "other", Label: "Other", Icon: "more-horizontal", Order: 100},
+	}
+}
diff --git a/core/http/app.go b/core/http/app.go
index 94f36c89ddfb..bfedfc34dce8 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -50,6 +50,36 @@ var quietPaths = []string{"/api/operations", "/api/resources", "/healthz", "/rea
 // @securityDefinitions.apikey BearerAuth
 // @in header
 // @name Authorization
+// @tag.name inference
+// @tag.description Chat completions, text completions, edits, and responses (OpenAI-compatible)
+// @tag.name embeddings
+// @tag.description Vector embeddings (OpenAI-compatible)
+// @tag.name audio
+// @tag.description Text-to-speech, transcription, voice activity detection, sound generation
+// @tag.name images
+// @tag.description Image generation and inpainting
+// @tag.name video
+// @tag.description Video generation from prompts
+// @tag.name detection
+// @tag.description Object detection in images
+// @tag.name tokenize
+// @tag.description Tokenization and token metrics
+// @tag.name models
+// @tag.description Model gallery browsing, installation, deletion, and listing
+// @tag.name backends
+// @tag.description Backend gallery browsing, installation, deletion, and listing
+// @tag.name config
+// @tag.description Model configuration metadata, autocomplete, PATCH updates, VRAM estimation
+// @tag.name monitoring
+// @tag.description Prometheus metrics, backend status, system information
+// @tag.name mcp
+// @tag.description Model Context Protocol — tool-augmented chat with MCP servers
+// @tag.name agent-jobs
+// @tag.description Agent task and job management
+// @tag.name p2p
+// @tag.description Peer-to-peer networking nodes and tokens
+// @tag.name rerank
+// @tag.description Document reranking
 
 func API(application *application.Application) (*echo.Echo, error) {
 	e := echo.New()
diff --git a/core/http/endpoints/anthropic/messages.go b/core/http/endpoints/anthropic/messages.go
index adb4b989f5b1..f7a65c93e110 100644
--- a/core/http/endpoints/anthropic/messages.go
+++ b/core/http/endpoints/anthropic/messages.go
@@ -22,6 +22,7 @@ import (
 // MessagesEndpoint is the Anthropic Messages API endpoint
 // https://docs.anthropic.com/claude/reference/messages_post
 // @Summary Generate a message response for the given messages and model.
+// @Tags inference
 // @Param request body schema.AnthropicRequest true "query params"
 // @Success 200 {object} schema.AnthropicResponse "Response"
 // @Router /v1/messages [post]
diff --git a/core/http/endpoints/elevenlabs/soundgeneration.go b/core/http/endpoints/elevenlabs/soundgeneration.go
index fa1803649bc8..7034ea042338 100644
--- a/core/http/endpoints/elevenlabs/soundgeneration.go
+++ b/core/http/endpoints/elevenlabs/soundgeneration.go
@@ -15,6 +15,7 @@ import (
 
 // SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation
 // @Summary Generates audio from the input text.
+// @Tags audio
 // @Param request body schema.ElevenLabsSoundGenerationRequest true "query params"
 // @Success 200 {string} binary	 "Response"
 // @Router /v1/sound-generation [post]
diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go
index ff859b04d340..3fc8c8f07602 100644
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -15,6 +15,7 @@ import (
 
 // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
 // @Summary Generates audio from the input text.
+// @Tags audio
 // @Param  voice-id	path string	true	"Account ID"
 // @Param request body schema.TTSRequest true "query params"
 // @Success 200 {string} binary	 "Response"
diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go
index 330fb94a4396..6dabd35f6336 100644
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -15,6 +15,7 @@ import (
 
 // JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
 // @Summary Reranks a list of phrases by relevance to a given text query.
+// @Tags rerank
 // @Param request body schema.JINARerankRequest true "query params"
 // @Success 200 {object} schema.JINARerankResponse "Response"
 // @Router /v1/rerank [post]
diff --git a/core/http/endpoints/localai/agent_jobs.go b/core/http/endpoints/localai/agent_jobs.go
index 8ed20d7df446..292a01a0c58c 100644
--- a/core/http/endpoints/localai/agent_jobs.go
+++ b/core/http/endpoints/localai/agent_jobs.go
@@ -29,6 +29,15 @@ func getJobService(app *application.Application, c echo.Context) *services.Agent
 	return jobSvc
 }
 
+// CreateTaskEndpoint creates a new agent task definition.
+// @Summary Create a new agent task
+// @Tags agent-jobs
+// @Accept json
+// @Produce json
+// @Param request body schema.Task true "Task definition"
+// @Success 201 {object} map[string]string "id"
+// @Failure 400 {object} map[string]string "error"
+// @Router /api/agent/tasks [post]
 func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		var task schema.Task
@@ -45,6 +54,17 @@ func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// UpdateTaskEndpoint updates an existing agent task.
+// @Summary Update an agent task
+// @Tags agent-jobs
+// @Accept json
+// @Produce json
+// @Param id path string true "Task ID"
+// @Param request body schema.Task true "Updated task definition"
+// @Success 200 {object} map[string]string "message"
+// @Failure 400 {object} map[string]string "error"
+// @Failure 404 {object} map[string]string "error"
+// @Router /api/agent/tasks/{id} [put]
 func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -64,6 +84,14 @@ func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// DeleteTaskEndpoint deletes an agent task.
+// @Summary Delete an agent task
+// @Tags agent-jobs
+// @Produce json
+// @Param id path string true "Task ID"
+// @Success 200 {object} map[string]string "message"
+// @Failure 404 {object} map[string]string "error"
+// @Router /api/agent/tasks/{id} [delete]
 func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -78,6 +106,13 @@ func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// ListTasksEndpoint lists all agent tasks for the current user.
+// @Summary List agent tasks
+// @Tags agent-jobs
+// @Produce json
+// @Param all_users query string false "Set to 'true' for admin cross-user listing"
+// @Success 200 {object} []schema.Task "tasks"
+// @Router /api/agent/tasks [get]
 func ListTasksEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		jobSvc := getJobService(app, c)
@@ -120,6 +155,14 @@ func ListTasksEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// GetTaskEndpoint returns a single agent task by ID.
+// @Summary Get an agent task
+// @Tags agent-jobs
+// @Produce json
+// @Param id path string true "Task ID"
+// @Success 200 {object} schema.Task "task"
+// @Failure 404 {object} map[string]string "error"
+// @Router /api/agent/tasks/{id} [get]
 func GetTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -132,6 +175,15 @@ func GetTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// ExecuteJobEndpoint creates and runs a new job for a task.
+// @Summary Execute an agent job
+// @Tags agent-jobs
+// @Accept json
+// @Produce json
+// @Param request body schema.JobExecutionRequest true "Job execution request"
+// @Success 201 {object} schema.JobExecutionResponse "job created"
+// @Failure 400 {object} map[string]string "error"
+// @Router /api/agent/jobs/execute [post]
 func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		var req schema.JobExecutionRequest
@@ -167,6 +219,14 @@ func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// GetJobEndpoint returns a single job by ID.
+// @Summary Get an agent job
+// @Tags agent-jobs
+// @Produce json
+// @Param id path string true "Job ID"
+// @Success 200 {object} schema.Job "job"
+// @Failure 404 {object} map[string]string "error"
+// @Router /api/agent/jobs/{id} [get]
 func GetJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -179,6 +239,16 @@ func GetJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// ListJobsEndpoint lists jobs, optionally filtered by task or status.
+// @Summary List agent jobs
+// @Tags agent-jobs
+// @Produce json
+// @Param task_id query string false "Filter by task ID"
+// @Param status query string false "Filter by status (pending, running, completed, failed, cancelled)"
+// @Param limit query integer false "Max number of jobs to return"
+// @Param all_users query string false "Set to 'true' for admin cross-user listing"
+// @Success 200 {object} []schema.Job "jobs"
+// @Router /api/agent/jobs [get]
 func ListJobsEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		var taskID *string
@@ -240,6 +310,15 @@ func ListJobsEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// CancelJobEndpoint cancels a running job.
+// @Summary Cancel an agent job
+// @Tags agent-jobs
+// @Produce json
+// @Param id path string true "Job ID"
+// @Success 200 {object} map[string]string "message"
+// @Failure 400 {object} map[string]string "error"
+// @Failure 404 {object} map[string]string "error"
+// @Router /api/agent/jobs/{id}/cancel [post]
 func CancelJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -254,6 +333,14 @@ func CancelJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// DeleteJobEndpoint deletes a job by ID.
+// @Summary Delete an agent job
+// @Tags agent-jobs
+// @Produce json
+// @Param id path string true "Job ID"
+// @Success 200 {object} map[string]string "message"
+// @Failure 404 {object} map[string]string "error"
+// @Router /api/agent/jobs/{id} [delete]
 func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -268,6 +355,17 @@ func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }
 
+// ExecuteTaskByNameEndpoint looks up a task by name and executes it.
+// @Summary Execute an agent task by name
+// @Tags agent-jobs
+// @Accept json
+// @Produce json
+// @Param name path string true "Task name"
+// @Param parameters body object false "Optional template parameters"
+// @Success 201 {object} schema.JobExecutionResponse "job created"
+// @Failure 400 {object} map[string]string "error"
+// @Failure 404 {object} map[string]string "error"
+// @Router /api/agent/tasks/{name}/execute [post]
 func ExecuteTaskByNameEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		name := c.Param("name")
diff --git a/core/http/endpoints/localai/api_skills.go b/core/http/endpoints/localai/api_skills.go
new file mode 100644
index 000000000000..44874a8c41c2
--- /dev/null
+++ b/core/http/endpoints/localai/api_skills.go
@@ -0,0 +1,473 @@
+package localai
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"sort"
+	"strings"
+	"sync"
+
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/swagger"
+)
+
+// skillDef is a lightweight skill definition that maps to swagger tags.
+type skillDef struct {
+	Name        string   `json:"name"`
+	Description string   `json:"description"`
+	Tags        []string `json:"tags"`
+	Intro       string   `json:"-"` // brief context not in swagger
+}
+
+var skillDefs = []skillDef{
+	{
+		Name:        "chat-inference",
+		Description: "OpenAI-compatible chat completions, text completions, and embeddings",
+		Tags:        []string{"inference", "embeddings"},
+		Intro:       "Set \"stream\": true for SSE streaming. Supports tool/function calling when the model config has function templates configured.",
+	},
+	{
+		Name:        "audio",
+		Description: "Text-to-speech, voice activity detection, transcription, and sound generation",
+		Tags:        []string{"audio"},
+	},
+	{
+		Name:        "images",
+		Description: "Image generation and inpainting",
+		Tags:        []string{"images"},
+	},
+	{
+		Name:        "model-management",
+		Description: "Browse the gallery, install, delete, and manage models and backends",
+		Tags:        []string{"models", "backends"},
+	},
+	{
+		Name:        "config-management",
+		Description: "Discover, read, and modify model configuration fields with VRAM estimation",
+		Tags:        []string{"config"},
+		Intro:       "Fields with static options include an \"options\" array in metadata. Fields with dynamic values have an \"autocomplete_provider\" for runtime lookup.",
+	},
+	{
+		Name:        "monitoring",
+		Description: "System metrics, backend status, and system information",
+		Tags:        []string{"monitoring"},
+	},
+	{
+		Name:        "mcp",
+		Description: "Model Context Protocol — tool-augmented chat with MCP servers",
+		Tags:        []string{"mcp"},
+		Intro:       "The model's config must define MCP servers. The endpoint handles tool execution automatically.",
+	},
+	{
+		Name:        "agents",
+		Description: "Agent task and job management for CI/automation workflows",
+		Tags:        []string{"agent-jobs"},
+	},
+	{
+		Name:        "video",
+		Description: "Video generation from text prompts",
+		Tags:        []string{"video"},
+	},
+}
+
+// swaggerState holds parsed swagger spec data, initialised once.
+type swaggerState struct {
+	once  sync.Once
+	spec  map[string]any // full parsed swagger JSON
+	ready bool
+}
+
+var swState swaggerState
+
+func (s *swaggerState) init() {
+	s.once.Do(func() {
+		var spec map[string]any
+		if err := json.Unmarshal(swagger.SwaggerJSON, &spec); err != nil {
+			return
+		}
+		s.spec = spec
+		s.ready = true
+	})
+}
+
+// filterSwaggerByTags returns a swagger fragment containing only paths whose
+// operations carry at least one of the given tags, plus the definitions they
+// reference.
+func filterSwaggerByTags(spec map[string]any, tags []string) map[string]any {
+	tagSet := make(map[string]bool, len(tags))
+	for _, t := range tags {
+		tagSet[t] = true
+	}
+
+	paths, _ := spec["paths"].(map[string]any)
+	allDefs, _ := spec["definitions"].(map[string]any)
+
+	filteredPaths := make(map[string]any)
+	for path, methods := range paths {
+		methodMap, ok := methods.(map[string]any)
+		if !ok {
+			continue
+		}
+		filteredMethods := make(map[string]any)
+		for method, opRaw := range methodMap {
+			op, ok := opRaw.(map[string]any)
+			if !ok {
+				continue
+			}
+			opTags, _ := op["tags"].([]any)
+			for _, t := range opTags {
+				if ts, ok := t.(string); ok && tagSet[ts] {
+					filteredMethods[method] = op
+					break
+				}
+			}
+		}
+		if len(filteredMethods) > 0 {
+			filteredPaths[path] = filteredMethods
+		}
+	}
+
+	// Collect all $ref definitions used by the filtered paths.
+	neededDefs := make(map[string]bool)
+	collectRefs(filteredPaths, neededDefs)
+
+	// Resolve nested refs from definitions themselves.
+	changed := true
+	for changed {
+		changed = false
+		for name := range neededDefs {
+			if def, ok := allDefs[name]; ok {
+				before := len(neededDefs)
+				collectRefs(def, neededDefs)
+				if len(neededDefs) > before {
+					changed = true
+				}
+			}
+		}
+	}
+
+	filteredDefs := make(map[string]any)
+	for name := range neededDefs {
+		if def, ok := allDefs[name]; ok {
+			filteredDefs[name] = def
+		}
+	}
+
+	result := map[string]any{
+		"paths": filteredPaths,
+	}
+	if len(filteredDefs) > 0 {
+		result["definitions"] = filteredDefs
+	}
+	return result
+}
+
+// collectRefs walks a JSON structure and collects all $ref definition names.
+func collectRefs(v any, refs map[string]bool) {
+	switch val := v.(type) {
+	case map[string]any:
+		if ref, ok := val["$ref"].(string); ok {
+			// "#/definitions/schema.OpenAIRequest" -> "schema.OpenAIRequest"
+			const prefix = "#/definitions/"
+			if strings.HasPrefix(ref, prefix) {
+				refs[ref[len(prefix):]] = true
+			}
+		}
+		for _, child := range val {
+			collectRefs(child, refs)
+		}
+	case []any:
+		for _, child := range val {
+			collectRefs(child, refs)
+		}
+	}
+}
+
+// swaggerToMarkdown renders a filtered swagger fragment into concise markdown.
+func swaggerToMarkdown(skillName, intro string, fragment map[string]any) string {
+	var b strings.Builder
+	b.WriteString("# ")
+	b.WriteString(skillName)
+	b.WriteString("\n")
+	if intro != "" {
+		b.WriteString("\n")
+		b.WriteString(intro)
+		b.WriteString("\n")
+	}
+
+	paths, _ := fragment["paths"].(map[string]any)
+	defs, _ := fragment["definitions"].(map[string]any)
+
+	// Sort paths for stable output.
+	sortedPaths := make([]string, 0, len(paths))
+	for p := range paths {
+		sortedPaths = append(sortedPaths, p)
+	}
+	sort.Strings(sortedPaths)
+
+	for _, path := range sortedPaths {
+		methods, ok := paths[path].(map[string]any)
+		if !ok {
+			continue
+		}
+		sortedMethods := sortMethods(methods)
+		for _, method := range sortedMethods {
+			op, ok := methods[method].(map[string]any)
+			if !ok {
+				continue
+			}
+			summary, _ := op["summary"].(string)
+			b.WriteString(fmt.Sprintf("\n## %s %s\n", strings.ToUpper(method), path))
+			if summary != "" {
+				b.WriteString(summary)
+				b.WriteString("\n")
+			}
+
+			// Parameters
+			params, _ := op["parameters"].([]any)
+			bodyParams, nonBodyParams := splitParams(params)
+
+			if len(nonBodyParams) > 0 {
+				b.WriteString("\n**Parameters:**\n")
+				b.WriteString("| Name | In | Type | Required | Description |\n")
+				b.WriteString("|------|----|------|----------|-------------|\n")
+				for _, p := range nonBodyParams {
+					pm, ok := p.(map[string]any)
+					if !ok {
+						continue
+					}
+					name, _ := pm["name"].(string)
+					in, _ := pm["in"].(string)
+					typ, _ := pm["type"].(string)
+					req, _ := pm["required"].(bool)
+					desc, _ := pm["description"].(string)
+					b.WriteString(fmt.Sprintf("| %s | %s | %s | %v | %s |\n", name, in, typ, req, desc))
+				}
+			}
+
+			if len(bodyParams) > 0 {
+				for _, p := range bodyParams {
+					pm, ok := p.(map[string]any)
+					if !ok {
+						continue
+					}
+					schema, _ := pm["schema"].(map[string]any)
+					refName := resolveRefName(schema)
+					if refName != "" {
+						b.WriteString(fmt.Sprintf("\n**Request body** (`%s`):\n", refName))
+						renderSchemaFields(&b, refName, defs)
+					}
+				}
+			}
+
+			// Responses
+			responses, _ := op["responses"].(map[string]any)
+			if len(responses) > 0 {
+				sortedCodes := make([]string, 0, len(responses))
+				for code := range responses {
+					sortedCodes = append(sortedCodes, code)
+				}
+				sort.Strings(sortedCodes)
+				for _, code := range sortedCodes {
+					resp, ok := responses[code].(map[string]any)
+					if !ok {
+						continue
+					}
+					desc, _ := resp["description"].(string)
+					respSchema, _ := resp["schema"].(map[string]any)
+					refName := resolveRefName(respSchema)
+					if refName != "" {
+						b.WriteString(fmt.Sprintf("\n**Response %s** (`%s`): %s\n", code, refName, desc))
+						renderSchemaFields(&b, refName, defs)
+					} else if desc != "" {
+						b.WriteString(fmt.Sprintf("\n**Response %s**: %s\n", code, desc))
+					}
+				}
+			}
+		}
+	}
+
+	return b.String()
+}
+
+// sortMethods returns HTTP methods in a conventional order.
+func sortMethods(methods map[string]any) []string {
+	order := map[string]int{"get": 0, "post": 1, "put": 2, "patch": 3, "delete": 4}
+	keys := make([]string, 0, len(methods))
+	for k := range methods {
+		keys = append(keys, k)
+	}
+	sort.Slice(keys, func(i, j int) bool {
+		oi, oki := order[keys[i]]
+		oj, okj := order[keys[j]]
+		if !oki {
+			oi = 99
+		}
+		if !okj {
+			oj = 99
+		}
+		return oi < oj
+	})
+	return keys
+}
+
+// splitParams separates body parameters from non-body parameters.
+func splitParams(params []any) (body, nonBody []any) {
+	for _, p := range params {
+		pm, ok := p.(map[string]any)
+		if !ok {
+			continue
+		}
+		if in, _ := pm["in"].(string); in == "body" {
+			body = append(body, p)
+		} else {
+			nonBody = append(nonBody, p)
+		}
+	}
+	return
+}
+
+// resolveRefName extracts the definition name from a $ref or returns "".
+func resolveRefName(schema map[string]any) string {
+	if schema == nil {
+		return ""
+	}
+	if ref, ok := schema["$ref"].(string); ok {
+		const prefix = "#/definitions/"
+		if strings.HasPrefix(ref, prefix) {
+			return ref[len(prefix):]
+		}
+	}
+	return ""
+}
+
+// renderSchemaFields writes a markdown field table for a definition.
+func renderSchemaFields(b *strings.Builder, defName string, defs map[string]any) {
+	if defs == nil {
+		return
+	}
+	def, ok := defs[defName].(map[string]any)
+	if !ok {
+		return
+	}
+	props, ok := def["properties"].(map[string]any)
+	if !ok || len(props) == 0 {
+		return
+	}
+
+	// Sort fields
+	fields := make([]string, 0, len(props))
+	for f := range props {
+		fields = append(fields, f)
+	}
+	sort.Strings(fields)
+
+	b.WriteString("| Field | Type | Description |\n")
+	b.WriteString("|-------|------|-------------|\n")
+	for _, field := range fields {
+		prop, ok := props[field].(map[string]any)
+		if !ok {
+			continue
+		}
+		typ := schemaTypeString(prop)
+		desc, _ := prop["description"].(string)
+		b.WriteString(fmt.Sprintf("| %s | %s | %s |\n", field, typ, desc))
+	}
+}
+
+// schemaTypeString returns a human-readable type string for a schema property.
+func schemaTypeString(prop map[string]any) string {
+	if ref := resolveRefName(prop); ref != "" {
+		return ref
+	}
+	typ, _ := prop["type"].(string)
+	if typ == "array" {
+		items, _ := prop["items"].(map[string]any)
+		if items != nil {
+			if ref := resolveRefName(items); ref != "" {
+				return "[]" + ref
+			}
+			it, _ := items["type"].(string)
+			if it != "" {
+				return "[]" + it
+			}
+		}
+		return "[]any"
+	}
+	if typ != "" {
+		return typ
+	}
+	return "object"
+}
+
+// APISkillResponse is the JSON response for a single skill (?format=json).
+type APISkillResponse struct {
+	Name            string         `json:"name"`
+	Description     string         `json:"description"`
+	Tags            []string       `json:"tags"`
+	SwaggerFragment map[string]any `json:"swagger_fragment,omitempty"`
+}
+
+// ListAPISkillsEndpoint returns all skills (compact list without guides).
+func ListAPISkillsEndpoint() echo.HandlerFunc {
+	return func(c echo.Context) error {
+		type compactSkill struct {
+			Name        string   `json:"name"`
+			Description string   `json:"description"`
+			Tags        []string `json:"tags"`
+			URL         string   `json:"url"`
+		}
+		skills := make([]compactSkill, len(skillDefs))
+		for i, s := range skillDefs {
+			skills[i] = compactSkill{
+				Name:        s.Name,
+				Description: s.Description,
+				Tags:        s.Tags,
+				URL:         "/api/skills/" + s.Name,
+			}
+		}
+		return c.JSON(http.StatusOK, map[string]any{
+			"skills": skills,
+			"hint":   "Fetch GET {url} for a markdown API guide. Add ?format=json for a raw OpenAPI fragment.",
+		})
+	}
+}
+
+// GetAPISkillEndpoint returns a single skill by name.
+// Query parameter ?format=json returns a filtered swagger fragment;
+// default (markdown) returns a human/LLM-readable guide.
+func GetAPISkillEndpoint() echo.HandlerFunc {
+	byName := make(map[string]*skillDef, len(skillDefs))
+	for i := range skillDefs {
+		byName[skillDefs[i].Name] = &skillDefs[i]
+	}
+
+	return func(c echo.Context) error {
+		name := c.Param("name")
+		skill, ok := byName[name]
+		if !ok {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "skill not found: " + name})
+		}
+
+		swState.init()
+		if !swState.ready {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "swagger spec not available"})
+		}
+
+		fragment := filterSwaggerByTags(swState.spec, skill.Tags)
+
+		format := c.QueryParam("format")
+		if format == "json" {
+			return c.JSON(http.StatusOK, APISkillResponse{
+				Name:            skill.Name,
+				Description:     skill.Description,
+				Tags:            skill.Tags,
+				SwaggerFragment: fragment,
+			})
+		}
+
+		guide := swaggerToMarkdown(skill.Name, skill.Intro, fragment)
+		return c.Blob(http.StatusOK, "text/markdown; charset=utf-8", []byte(guide))
+	}
+}
diff --git a/core/http/endpoints/localai/backend.go b/core/http/endpoints/localai/backend.go
index f804f1b35c73..63e31cb5c7c5 100644
--- a/core/http/endpoints/localai/backend.go
+++ b/core/http/endpoints/localai/backend.go
@@ -37,6 +37,7 @@ func CreateBackendEndpointService(galleries []config.Gallery, systemState *syste
 
 // GetOpStatusEndpoint returns the job status
 // @Summary Returns the job status
+// @Tags backends
 // @Success 200 {object} services.GalleryOpStatus "Response"
 // @Router /backends/jobs/{uuid} [get]
 func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
@@ -51,6 +52,7 @@ func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
 
 // GetAllStatusEndpoint returns all the jobs status progress
 // @Summary Returns all the jobs status progress
+// @Tags backends
 // @Success 200 {object} map[string]services.GalleryOpStatus "Response"
 // @Router /backends/jobs [get]
 func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
@@ -61,6 +63,7 @@ func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
 
 // ApplyBackendEndpoint installs a new backend to a LocalAI instance
 // @Summary Install backends to LocalAI.
+// @Tags backends
 // @Param request body GalleryBackend true "query params"
 // @Success 200 {object} schema.BackendResponse "Response"
 // @Router /backends/apply [post]
@@ -88,6 +91,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() echo.HandlerFunc {
 
 // DeleteBackendEndpoint lets delete backends from a LocalAI instance
 // @Summary delete backends from LocalAI.
+// @Tags backends
 // @Param name	path string	true	"Backend name"
 // @Success 200 {object} schema.BackendResponse "Response"
 // @Router /backends/delete/{name} [post]
@@ -112,6 +116,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() echo.HandlerFunc {
 
 // ListBackendsEndpoint list the available backends configured in LocalAI
 // @Summary List all Backends
+// @Tags backends
 // @Success 200 {object} []gallery.GalleryBackend "Response"
 // @Router /backends [get]
 func (mgs *BackendEndpointService) ListBackendsEndpoint(systemState *system.SystemState) echo.HandlerFunc {
@@ -126,6 +131,7 @@ func (mgs *BackendEndpointService) ListBackendsEndpoint(systemState *system.Syst
 
 // ListModelGalleriesEndpoint list the available galleries configured in LocalAI
 // @Summary List all Galleries
+// @Tags backends
 // @Success 200 {object} []config.Gallery "Response"
 // @Router /backends/galleries [get]
 // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
@@ -142,6 +148,7 @@ func (mgs *BackendEndpointService) ListBackendGalleriesEndpoint() echo.HandlerFu
 
 // ListAvailableBackendsEndpoint list the available backends in the galleries configured in LocalAI
 // @Summary List all available Backends
+// @Tags backends
 // @Success 200 {object} []gallery.GalleryBackend "Response"
 // @Router /backends/available [get]
 func (mgs *BackendEndpointService) ListAvailableBackendsEndpoint(systemState *system.SystemState) echo.HandlerFunc {
diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go
index 18016c579220..29b6f39810fe 100644
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -8,6 +8,7 @@ import (
 
 // BackendMonitorEndpoint returns the status of the specified backend
 // @Summary Backend monitor endpoint
+// @Tags monitoring
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Success 200 {object} proto.StatusResponse "Response"
 // @Router /backend/monitor [get]
@@ -29,7 +30,8 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) echo.HandlerFunc
 }
 
 // BackendShutdownEndpoint shuts down the specified backend
-// @Summary Backend monitor endpoint
+// @Summary Backend shutdown endpoint
+// @Tags monitoring
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Router /backend/shutdown [post]
 func BackendShutdownEndpoint(bm *services.BackendMonitorService) echo.HandlerFunc {
diff --git a/core/http/endpoints/localai/config_meta.go b/core/http/endpoints/localai/config_meta.go
new file mode 100644
index 000000000000..8f9083c8fa71
--- /dev/null
+++ b/core/http/endpoints/localai/config_meta.go
@@ -0,0 +1,251 @@
+package localai
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"reflect"
+	"sort"
+	"strings"
+
+	"dario.cat/mergo"
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config/meta"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
+	"github.com/mudler/xlog"
+	"gopkg.in/yaml.v3"
+)
+
+// ConfigMetadataEndpoint returns field metadata for config fields.
+// Without ?section, returns just the section index (lightweight).
+// With ?section=<id>, returns fields for that section only.
+// With ?section=all, returns all fields grouped by section.
+// @Summary List model configuration field metadata
+// @Description Returns config field metadata. Use ?section=<id> to filter by section, or omit for a section index.
+// @Tags config
+// @Produce json
+// @Param section query string false "Section ID to filter (e.g. 'general', 'llm', 'parameters') or 'all' for everything"
+// @Success 200 {object} map[string]any "Section index or filtered field metadata"
+// @Router /api/models/config-metadata [get]
+func ConfigMetadataEndpoint() echo.HandlerFunc {
+	return func(c echo.Context) error {
+		sectionParam := c.QueryParam("section")
+
+		// No section param: return lightweight section index.
+		if sectionParam == "" {
+			sections := meta.DefaultSections()
+			type sectionInfo struct {
+				ID    string `json:"id"`
+				Label string `json:"label"`
+				URL   string `json:"url"`
+			}
+			index := make([]sectionInfo, len(sections))
+			for i, s := range sections {
+				index[i] = sectionInfo{
+					ID:    s.ID,
+					Label: s.Label,
+					URL:   "/api/models/config-metadata?section=" + s.ID,
+				}
+			}
+			return c.JSON(http.StatusOK, map[string]any{
+				"hint":     "Fetch a section URL to see its fields. Use ?section=all for everything.",
+				"sections": index,
+			})
+		}
+
+		md := meta.BuildConfigMetadata(reflect.TypeOf(config.ModelConfig{}))
+
+		// section=all: return everything.
+		if sectionParam == "all" {
+			return c.JSON(http.StatusOK, md)
+		}
+
+		// Filter to requested section.
+		var filtered []meta.FieldMeta
+		for _, f := range md.Fields {
+			if f.Section == sectionParam {
+				filtered = append(filtered, f)
+			}
+		}
+		if len(filtered) == 0 {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown section: " + sectionParam})
+		}
+		return c.JSON(http.StatusOK, filtered)
+	}
+}
+
+// AutocompleteEndpoint handles dynamic autocomplete lookups for config fields.
+// Static option lists (quantizations, cache types, diffusers pipelines/schedulers)
+// are embedded directly in the field metadata Options; only truly dynamic values
+// that require runtime lookup are served here.
+// @Summary Get dynamic autocomplete values for a config field
+// @Description Returns runtime-resolved values for dynamic providers (backends, models)
+// @Tags config
+// @Produce json
+// @Param provider path string true "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)"
+// @Success 200 {object} map[string]any "values array"
+// @Router /api/models/config-metadata/autocomplete/{provider} [get]
+func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		provider := c.Param("provider")
+		var values []string
+
+		switch {
+		case provider == meta.ProviderBackends:
+			installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState)
+			if err == nil {
+				for name := range installedBackends {
+					values = append(values, name)
+				}
+			}
+			sort.Strings(values)
+
+		case provider == meta.ProviderModels:
+			modelConfigs := cl.GetAllModelsConfigs()
+			for _, cfg := range modelConfigs {
+				values = append(values, cfg.Name)
+			}
+			modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
+			values = append(values, modelsWithoutConfig...)
+			sort.Strings(values)
+
+		case strings.HasPrefix(provider, "models:"):
+			capability := strings.TrimPrefix(provider, "models:")
+			var filterFn config.ModelConfigFilterFn
+			switch capability {
+			case "chat":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT)
+			case "tts":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS)
+			case "vad":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD)
+			case "transcript":
+				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)
+			default:
+				filterFn = config.NoFilterFn
+			}
+			filteredConfigs := cl.GetModelConfigsByFilter(filterFn)
+			for _, cfg := range filteredConfigs {
+				values = append(values, cfg.Name)
+			}
+			sort.Strings(values)
+
+		default:
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown provider: " + provider})
+		}
+
+		return c.JSON(http.StatusOK, map[string]any{"values": values})
+	}
+}
+
+// PatchConfigEndpoint handles PATCH requests to partially update a model config
+// using nested JSON merge.
+// @Summary Partially update a model configuration
+// @Description Deep-merges the JSON patch body into the existing model config
+// @Tags config
+// @Accept json
+// @Produce json
+// @Param name path string true "Model name"
+// @Success 200 {object} map[string]any "success message"
+// @Router /api/models/config-json/{name} [patch]
+func PatchConfigEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		modelName := c.Param("name")
+		if decoded, err := url.PathUnescape(modelName); err == nil {
+			modelName = decoded
+		}
+		if modelName == "" {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
+		}
+
+		modelConfig, exists := cl.GetModelConfig(modelName)
+		if !exists {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
+		}
+
+		// Read patch body
+		patchBody, err := io.ReadAll(c.Request().Body)
+		if err != nil || len(patchBody) == 0 {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "request body is empty or unreadable"})
+		}
+
+		// Validate patch body is valid JSON
+		var patchMap map[string]any
+		if err := json.Unmarshal(patchBody, &patchMap); err != nil {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid JSON: " + err.Error()})
+		}
+
+		// Marshal existing config to JSON
+		existingJSON, err := json.Marshal(modelConfig)
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal existing config"})
+		}
+
+		// Deep-merge patch into existing
+		var existingMap map[string]any
+		if err := json.Unmarshal(existingJSON, &existingMap); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to parse existing config"})
+		}
+
+		if err := mergo.Merge(&existingMap, patchMap, mergo.WithOverride); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to merge configs: " + err.Error()})
+		}
+
+		// Marshal merged config back to JSON
+		mergedJSON, err := json.Marshal(existingMap)
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal merged config"})
+		}
+
+		// Unmarshal to ModelConfig for validation
+		var updatedConfig config.ModelConfig
+		if err := json.Unmarshal(mergedJSON, &updatedConfig); err != nil {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "merged config is invalid: " + err.Error()})
+		}
+
+		// Validate
+		if valid, err := updatedConfig.Validate(); !valid {
+			errMsg := "validation failed"
+			if err != nil {
+				errMsg = err.Error()
+			}
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": errMsg})
+		}
+
+		// Write as YAML to disk
+		configPath := modelConfig.GetModelConfigFile()
+		if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil {
+			return c.JSON(http.StatusForbidden, map[string]any{"error": "config path not trusted: " + err.Error()})
+		}
+
+		yamlData, err := yaml.Marshal(updatedConfig)
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal YAML"})
+		}
+
+		if err := os.WriteFile(configPath, yamlData, 0644); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to write config file"})
+		}
+
+		// Reload configs
+		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to reload configs: " + err.Error()})
+		}
+
+		if err := cl.Preload(appConfig.SystemState.Model.ModelsPath); err != nil {
+			xlog.Warn("Failed to preload after PATCH", "error", err)
+		}
+
+		return c.JSON(http.StatusOK, map[string]any{
+			"success": true,
+			"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
+		})
+	}
+}
diff --git a/core/http/endpoints/localai/detection.go b/core/http/endpoints/localai/detection.go
index 77a0c7256526..3dcbbde6d783 100644
--- a/core/http/endpoints/localai/detection.go
+++ b/core/http/endpoints/localai/detection.go
@@ -13,6 +13,7 @@ import (
 
 // DetectionEndpoint is the LocalAI Detection endpoint https://localai.io/docs/api-reference/detection
 // @Summary Detects objects in the input image.
+// @Tags detection
 // @Param request body schema.DetectionRequest true "query params"
 // @Success 200 {object} schema.DetectionResponse "Response"
 // @Router /v1/detection [post]
diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
index 5c87e6d05e19..56bff7272511 100644
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -40,6 +40,7 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, backendGaller
 
 // GetOpStatusEndpoint returns the job status
 // @Summary Returns the job status
+// @Tags models
 // @Success 200 {object} services.GalleryOpStatus "Response"
 // @Router /models/jobs/{uuid} [get]
 func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
@@ -54,6 +55,7 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
 
 // GetAllStatusEndpoint returns all the jobs status progress
 // @Summary Returns all the jobs status progress
+// @Tags models
 // @Success 200 {object} map[string]services.GalleryOpStatus "Response"
 // @Router /models/jobs [get]
 func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
@@ -64,6 +66,7 @@ func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc
 
 // ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
 // @Summary Install models to LocalAI.
+// @Tags models
 // @Param request body GalleryModel true "query params"
 // @Success 200 {object} schema.GalleryResponse "Response"
 // @Router /models/apply [post]
@@ -93,6 +96,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() echo.Handler
 
 // DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
 // @Summary delete models to LocalAI.
+// @Tags models
 // @Param name	path string	true	"Model name"
 // @Success 200 {object} schema.GalleryResponse "Response"
 // @Router /models/delete/{name} [post]
@@ -118,7 +122,8 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() echo.Handle
 
 // ListModelFromGalleryEndpoint list the available models for installation from the active galleries
 // @Summary List installable models.
-// @Success 200 {object} []gallery.GalleryModel "Response"
+// @Tags models
+// @Success 200 {object} []gallery.Metadata "Response"
 // @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState *system.SystemState) echo.HandlerFunc {
 	return func(c echo.Context) error {
@@ -149,6 +154,7 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState
 
 // ListModelGalleriesEndpoint list the available galleries configured in LocalAI
 // @Summary List all Galleries
+// @Tags models
 // @Success 200 {object} []config.Gallery "Response"
 // @Router /models/galleries [get]
 // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go
index 69c408e50b76..36b0301b78f9 100644
--- a/core/http/endpoints/localai/get_token_metrics.go
+++ b/core/http/endpoints/localai/get_token_metrics.go
@@ -16,6 +16,7 @@ import (
 // TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
 //
 //	@Summary	Get TokenMetrics for Active Slot.
+//	@Tags tokenize
 //	@Accept json
 //	@Produce audio/x-wav
 //	@Success	200		{string}	binary				"generated audio/wav file"
diff --git a/core/http/endpoints/localai/mcp.go b/core/http/endpoints/localai/mcp.go
index 0ff75f4a9392..ef879a737ecd 100644
--- a/core/http/endpoints/localai/mcp.go
+++ b/core/http/endpoints/localai/mcp.go
@@ -52,6 +52,7 @@ type MCPErrorEvent struct {
 // which handles MCP tool injection and server-side execution.
 // Both streaming and non-streaming modes use standard OpenAI response format.
 // @Summary MCP chat completions with automatic tool execution
+// @Tags mcp
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/mcp/chat/completions [post]
diff --git a/core/http/endpoints/localai/metrics.go b/core/http/endpoints/localai/metrics.go
index a5f08a7f6444..9027e738921f 100644
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@@ -10,7 +10,9 @@ import (
 
 // LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
 // @Summary Prometheus metrics endpoint
-// @Param request body config.Gallery true "Gallery details"
+// @Tags monitoring
+// @Produce text/plain
+// @Success 200 {string} string "Prometheus metrics"
 // @Router /metrics [get]
 func LocalAIMetricsEndpoint() echo.HandlerFunc {
 	return echo.WrapHandler(promhttp.Handler())
diff --git a/core/http/endpoints/localai/p2p.go b/core/http/endpoints/localai/p2p.go
index cc630be4f440..e168b6f9a196 100644
--- a/core/http/endpoints/localai/p2p.go
+++ b/core/http/endpoints/localai/p2p.go
@@ -9,6 +9,7 @@ import (
 
 // ShowP2PNodes returns the P2P Nodes
 // @Summary Returns available P2P nodes
+// @Tags p2p
 // @Success 200 {object} []schema.P2PNodesResponse "Response"
 // @Router /api/p2p [get]
 func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc {
@@ -24,6 +25,7 @@ func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc {
 
 // ShowP2PToken returns the P2P token
 // @Summary Show the P2P token
+// @Tags p2p
 // @Success 200 {string} string	 "Response"
 // @Router /api/p2p/token [get]
 func ShowP2PToken(appConfig *config.ApplicationConfig) echo.HandlerFunc {
diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go
index a3831e18483a..3e5652117bc5 100644
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -9,6 +9,7 @@ import (
 
 // SystemInformations returns the system informations
 // @Summary Show the LocalAI instance information
+// @Tags monitoring
 // @Success 200 {object} schema.SystemInformationResponse "Response"
 // @Router /system [get]
 func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
diff --git a/core/http/endpoints/localai/tokenize.go b/core/http/endpoints/localai/tokenize.go
index 23eec48c7545..439b7a7e618a 100644
--- a/core/http/endpoints/localai/tokenize.go
+++ b/core/http/endpoints/localai/tokenize.go
@@ -11,6 +11,7 @@ import (
 
 // TokenizeEndpoint exposes a REST API to tokenize the content
 // @Summary Tokenize the input.
+// @Tags tokenize
 // @Param request body schema.TokenizeRequest true "Request"
 // @Success 200 {object} schema.TokenizeResponse "Response"
 // @Router /v1/tokenize [post]
diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go
index 49bba5528bb1..40e4881910f7 100644
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -17,6 +17,7 @@ import (
 // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
 //
 //		@Summary	Generates audio from the input text.
+//		@Tags		audio
 //	 	@Accept json
 //	 	@Produce audio/x-wav
 //		@Param		request	body		schema.TTSRequest	true	"query params"
diff --git a/core/http/endpoints/localai/vad.go b/core/http/endpoints/localai/vad.go
index 155574c85102..84521af2462c 100644
--- a/core/http/endpoints/localai/vad.go
+++ b/core/http/endpoints/localai/vad.go
@@ -12,6 +12,7 @@ import (
 
 // VADEndpoint is Voice-Activation-Detection endpoint
 // @Summary	Detect voice fragments in an audio stream
+// @Tags audio
 // @Accept json
 // @Param		request	body		schema.VADRequest	true	"query params"
 // @Success 200 {object} proto.VADResponse "Response"
diff --git a/core/http/endpoints/localai/video.go b/core/http/endpoints/localai/video.go
index da33a03734ac..8a65ae5fd9e0 100644
--- a/core/http/endpoints/localai/video.go
+++ b/core/http/endpoints/localai/video.go
@@ -62,6 +62,7 @@ func downloadFile(url string) (string, error) {
 */
 // VideoEndpoint
 // @Summary Creates a video given a prompt.
+// @Tags video
 // @Param request body schema.VideoRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /video [post]
diff --git a/core/http/endpoints/localai/vram.go b/core/http/endpoints/localai/vram.go
new file mode 100644
index 000000000000..e31c2d12f9e5
--- /dev/null
+++ b/core/http/endpoints/localai/vram.go
@@ -0,0 +1,154 @@
+package localai
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"path"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/vram"
+)
+
+type vramEstimateRequest struct {
+	Model       string `json:"model"`                      // model name (must be installed)
+	ContextSize uint32 `json:"context_size,omitempty"`     // context length to estimate for (default 8192)
+	GPULayers   int    `json:"gpu_layers,omitempty"`       // number of layers to offload to GPU (0 = all)
+	KVQuantBits int    `json:"kv_quant_bits,omitempty"`    // KV cache quantization bits (0 = fp16)
+}
+
+type vramEstimateResponse struct {
+	vram.EstimateResult
+	ContextNote     string `json:"context_note,omitempty"`      // note when context_size was defaulted
+	ModelMaxContext uint64 `json:"model_max_context,omitempty"` // model's trained maximum context length
+}
+
+// resolveModelURI converts a relative model path to a file:// URI so the
+// size resolver can stat it on disk. URIs that already have a scheme are
+// returned unchanged.
+func resolveModelURI(uri, modelsPath string) string {
+	if strings.Contains(uri, "://") {
+		return uri
+	}
+	return "file://" + filepath.Join(modelsPath, uri)
+}
+
+// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
+// installed model configuration. For uninstalled models (gallery URLs), use
+// the gallery-level estimates in /api/models instead.
+// @Summary Estimate VRAM usage for a model
+// @Description Estimates VRAM based on model weight files, context size, and GPU layers
+// @Tags config
+// @Accept json
+// @Produce json
+// @Param request body vramEstimateRequest true "VRAM estimation parameters"
+// @Success 200 {object} vramEstimateResponse "VRAM estimate"
+// @Router /api/models/vram-estimate [post]
+func VRAMEstimateEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true}
+
+	return func(c echo.Context) error {
+		var req vramEstimateRequest
+		if err := c.Bind(&req); err != nil {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid request body"})
+		}
+
+		if req.Model == "" {
+			return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
+		}
+
+		modelConfig, exists := cl.GetModelConfig(req.Model)
+		if !exists {
+			return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
+		}
+
+		modelsPath := appConfig.SystemState.Model.ModelsPath
+
+		// Build file inputs from the model's download files
+		var files []vram.FileInput
+		var firstGGUF string
+		for _, f := range modelConfig.DownloadFiles {
+			ext := strings.ToLower(path.Ext(path.Base(f.Filename)))
+			if weightExts[ext] {
+				resolved := resolveModelURI(string(f.URI), modelsPath)
+				files = append(files, vram.FileInput{URI: resolved, Size: 0})
+				if firstGGUF == "" && ext == ".gguf" {
+					firstGGUF = resolved
+				}
+			}
+		}
+
+		// Also include the main model file if it looks like a weight file
+		if modelConfig.Model != "" {
+			ext := strings.ToLower(path.Ext(path.Base(modelConfig.Model)))
+			if weightExts[ext] {
+				resolved := resolveModelURI(modelConfig.Model, modelsPath)
+				files = append(files, vram.FileInput{URI: resolved, Size: 0})
+				if firstGGUF == "" && ext == ".gguf" {
+					firstGGUF = resolved
+				}
+			}
+		}
+
+		// Include the vision projector (mmproj) file — it is loaded entirely
+		// into GPU VRAM alongside the main model.
+		if modelConfig.MMProj != "" {
+			ext := strings.ToLower(path.Ext(path.Base(modelConfig.MMProj)))
+			if weightExts[ext] {
+				resolved := resolveModelURI(modelConfig.MMProj, modelsPath)
+				files = append(files, vram.FileInput{URI: resolved, Size: 0})
+			}
+		}
+
+		if len(files) == 0 {
+			return c.JSON(http.StatusOK, map[string]any{
+				"message": "no weight files found for estimation",
+			})
+		}
+
+		contextDefaulted := false
+		opts := vram.EstimateOptions{
+			ContextLength: req.ContextSize,
+			GPULayers:     req.GPULayers,
+			KVQuantBits:   req.KVQuantBits,
+		}
+		if opts.ContextLength == 0 {
+			if modelConfig.ContextSize != nil {
+				opts.ContextLength = uint32(*modelConfig.ContextSize)
+			} else {
+				opts.ContextLength = 8192
+				contextDefaulted = true
+			}
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+
+		result, err := vram.Estimate(ctx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()})
+		}
+
+		resp := vramEstimateResponse{EstimateResult: result}
+
+		// When context was defaulted to 8192, read the GGUF metadata to report
+		// the model's trained maximum context length so callers know the estimate
+		// may be conservative.
+		if contextDefaulted && firstGGUF != "" {
+			ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(ctx, firstGGUF)
+			if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 {
+				resp.ModelMaxContext = ggufMeta.MaximumContextLength
+				resp.ContextNote = fmt.Sprintf(
+					"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
+					ggufMeta.MaximumContextLength,
+				)
+			}
+		}
+
+		return c.JSON(http.StatusOK, resp)
+	}
+}
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 871084054284..da84f58b6209 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -56,6 +56,7 @@ func mergeToolCallDeltas(existing []schema.ToolCall, deltas []schema.ToolCall) [
 
 // ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
 // @Summary Generate a chat completions for a given prompt and model.
+// @Tags inference
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index 7b094cb3bf22..069bc33a60f5 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -21,6 +21,7 @@ import (
 
 // CompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
 // @Summary Generate completions for a given prompt and model.
+// @Tags inference
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/completions [post]
diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go
index 917a05a2455e..c74e2d1f7546 100644
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -20,6 +20,7 @@ import (
 
 // EditEndpoint is the OpenAI edit API endpoint
 // @Summary OpenAI edit endpoint
+// @Tags inference
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/edits [post]
diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go
index b88f3eb03795..04ffbf1facec 100644
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -18,6 +18,7 @@ import (
 
 // EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings
 // @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
+// @Tags embeddings
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/embeddings [post]
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index 32834a923331..19e530d7c52e 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -68,6 +68,7 @@ func downloadFile(url string) (string, error) {
 */
 // ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
 // @Summary Creates an image given a prompt.
+// @Tags images
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/images/generations [post]
diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go
index 1f722bacf90e..ff0b08225392 100644
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -12,6 +12,7 @@ import (
 
 // ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
 // @Summary List and describe the various models available in the API.
+// @Tags models
 // @Success 200 {object} schema.ModelsDataResponse "Response"
 // @Router /v1/models [get]
 func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, db ...*gorm.DB) echo.HandlerFunc {
diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go
index c52fe1914c2d..ffbfb9faac4d 100644
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -21,6 +21,7 @@ import (
 
 // TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create
 // @Summary Transcribes audio into the input language.
+// @Tags audio
 // @accept multipart/form-data
 // @Param model formData string true "model"
 // @Param file formData file true "file"
diff --git a/core/http/endpoints/openresponses/responses.go b/core/http/endpoints/openresponses/responses.go
index 37c59b5687c4..dc2e1301560d 100644
--- a/core/http/endpoints/openresponses/responses.go
+++ b/core/http/endpoints/openresponses/responses.go
@@ -26,6 +26,7 @@ import (
 // ResponsesEndpoint is the Open Responses API endpoint
 // https://www.openresponses.org/specification
 // @Summary Create a response using the Open Responses API
+// @Tags inference
 // @Param request body schema.OpenResponsesRequest true "Request body"
 // @Success 200 {object} schema.ORResponseResource "Response"
 // @Router /v1/responses [post]
@@ -3000,6 +3001,7 @@ func convertORToolsToOpenAIFormat(orTools []schema.ORFunctionTool) []functions.T
 // GetResponseEndpoint returns a handler for GET /responses/:id
 // This endpoint is used for polling background responses or resuming streaming
 // @Summary Get a response by ID
+// @Tags inference
 // @Description Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.
 // @Param id path string true "Response ID"
 // @Param stream query string false "Set to 'true' to resume streaming"
@@ -3141,6 +3143,7 @@ func handleStreamResume(c echo.Context, store *ResponseStore, responseID string,
 // CancelResponseEndpoint returns a handler for POST /responses/:id/cancel
 // This endpoint cancels a background response if it's still in progress
 // @Summary Cancel a response
+// @Tags inference
 // @Description Cancel a background response if it's still in progress
 // @Param id path string true "Response ID"
 // @Success 200 {object} schema.ORResponseResource "Response"
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index c91986ab6439..5972fb6153ca 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -132,6 +132,113 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		}{Version: internal.PrintableVersion()})
 	})
 
+	// Agent discovery endpoint
+	router.GET("/.well-known/localai.json", func(c echo.Context) error {
+		return c.JSON(200, map[string]any{
+			"version": internal.PrintableVersion(),
+			// Flat endpoint list for backwards compatibility
+			"endpoints": map[string]any{
+				"models":           "/v1/models",
+				"chat_completions": "/v1/chat/completions",
+				"completions":      "/v1/completions",
+				"embeddings":       "/v1/embeddings",
+				"config_metadata":  "/api/models/config-metadata",
+				"config_json":      "/api/models/config-json/:name",
+				"config_patch":     "/api/models/config-json/:name",
+				"autocomplete":     "/api/models/config-metadata/autocomplete/:provider",
+				"vram_estimate":    "/api/models/vram-estimate",
+				"tts":              "/tts",
+				"transcription":    "/v1/audio/transcriptions",
+				"image_generation": "/v1/images/generations",
+				"swagger":          "/swagger/index.html",
+				"skills":           "/api/skills",
+			},
+			// Categorized endpoint groups for structured discovery
+			"endpoint_groups": map[string]any{
+				"openai_compatible": map[string]string{
+					"models":           "/v1/models",
+					"chat_completions": "/v1/chat/completions",
+					"completions":      "/v1/completions",
+					"embeddings":       "/v1/embeddings",
+					"transcription":    "/v1/audio/transcriptions",
+					"image_generation": "/v1/images/generations",
+				},
+				"config_management": map[string]string{
+					"config_metadata": "/api/models/config-metadata",
+					"config_json":     "/api/models/config-json/:name",
+					"config_patch":    "/api/models/config-json/:name",
+					"autocomplete":    "/api/models/config-metadata/autocomplete/:provider",
+					"vram_estimate":   "/api/models/vram-estimate",
+				},
+				"model_management": map[string]string{
+					"list_gallery": "/models/available",
+					"install":      "/models/apply",
+					"delete":       "/models/delete/:name",
+					"edit":         "/models/edit/:name",
+					"import":       "/models/import",
+					"reload":       "/models/reload",
+				},
+				"ai_functions": map[string]string{
+					"tts":       "/tts",
+					"vad":       "/vad",
+					"video":     "/video",
+					"detection": "/v1/detection",
+					"tokenize":  "/v1/tokenize",
+				},
+				"monitoring": map[string]string{
+					"metrics":          "/metrics",
+					"backend_monitor":  "/backend/monitor",
+					"backend_shutdown": "/backend/shutdown",
+					"system":           "/system",
+					"version":          "/version",
+					"traces":           "/api/traces",
+				},
+				"mcp": map[string]string{
+					"chat_completions": "/v1/mcp/chat/completions",
+					"servers":          "/v1/mcp/servers/:model",
+					"prompts":          "/v1/mcp/prompts/:model",
+					"resources":        "/v1/mcp/resources/:model",
+				},
+				"p2p": map[string]string{
+					"nodes": "/api/p2p",
+					"token": "/api/p2p/token",
+				},
+				"agents": map[string]string{
+					"tasks":   "/api/agent/tasks",
+					"jobs":    "/api/agent/jobs",
+					"execute": "/api/agent/jobs/execute",
+				},
+				"settings": map[string]string{
+					"get":    "/api/settings",
+					"update": "/api/settings",
+				},
+				"stores": map[string]string{
+					"set":    "/stores/set",
+					"get":    "/stores/get",
+					"find":   "/stores/find",
+					"delete": "/stores/delete",
+				},
+				"docs": map[string]string{
+					"swagger": "/swagger/index.html",
+					"skills":  "/api/skills",
+				},
+			},
+			"capabilities": map[string]bool{
+				"config_metadata": true,
+				"config_patch":    true,
+				"vram_estimate":   true,
+				"mcp":             !appConfig.DisableMCP,
+				"agents":          appConfig.AgentPool.Enabled,
+				"p2p":             appConfig.P2PToken != "",
+				"tracing":         true,
+			},
+		})
+	})
+
+	// API skills for agent discovery (no auth — agents should discover these without credentials)
+	router.GET("/api/skills", localai.ListAPISkillsEndpoint())
+	router.GET("/api/skills/:name", localai.GetAPISkillEndpoint())
+
 	router.GET("/api/features", func(c echo.Context) error {
 		return c.JSON(200, map[string]bool{
 			"agents":       appConfig.AgentPool.Enabled,
diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go
index 81d9b4275ef0..ad539c7ee1f0 100644
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -701,6 +701,18 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		return c.JSON(http.StatusOK, modelConfig)
 	}, adminMiddleware)
 
+	// Config metadata API - returns field metadata for all ~170 config fields
+	app.GET("/api/models/config-metadata", localai.ConfigMetadataEndpoint(), adminMiddleware)
+
+	// Autocomplete providers for config fields (dynamic values only)
+	app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
+
+	// PATCH config endpoint - partial update using nested JSON merge
+	app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
+
+	// VRAM estimation endpoint
+	app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
+
 	// Get installed model YAML config for the React model editor
 	app.GET("/api/models/edit/:name", func(c echo.Context) error {
 		modelName := c.Param("name")
@@ -1307,3 +1319,4 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		})
 	}, adminMiddleware)
 }
+
diff --git a/core/schema/agent_jobs.go b/core/schema/agent_jobs.go
index ac5d13fc6c9f..51f28e50ffab 100644
--- a/core/schema/agent_jobs.go
+++ b/core/schema/agent_jobs.go
@@ -126,9 +126,9 @@ type JobExecutionRequest struct {
 
 // JobExecutionResponse represents the response after creating a job
 type JobExecutionResponse struct {
-	JobID  string `json:"job_id"`
-	Status string `json:"status"`
-	URL    string `json:"url"` // URL to check job status
+	JobID  string `json:"job_id"` // unique job identifier
+	Status string `json:"status"` // initial status (pending)
+	URL    string `json:"url"`    // URL to poll for job status
 }
 
 // TasksFile represents the structure of agent_tasks.json
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 6f98bf320eee..0f130f95c371 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -33,21 +33,21 @@ type GalleryResponse struct {
 
 type VideoRequest struct {
 	BasicModelRequest
-	Prompt         string  `json:"prompt" yaml:"prompt"`
-	NegativePrompt string  `json:"negative_prompt" yaml:"negative_prompt"`
-	StartImage     string  `json:"start_image" yaml:"start_image"`
-	EndImage       string  `json:"end_image" yaml:"end_image"`
-	Width          int32   `json:"width" yaml:"width"`
-	Height         int32   `json:"height" yaml:"height"`
-	NumFrames      int32   `json:"num_frames" yaml:"num_frames"`
-	FPS            int32   `json:"fps" yaml:"fps"`
-	Seconds        string  `json:"seconds,omitempty" yaml:"seconds,omitempty"`
-	Size           string  `json:"size,omitempty" yaml:"size,omitempty"`
-	InputReference string  `json:"input_reference,omitempty" yaml:"input_reference,omitempty"`
-	Seed           int32   `json:"seed" yaml:"seed"`
-	CFGScale       float32 `json:"cfg_scale" yaml:"cfg_scale"`
-	Step           int32   `json:"step" yaml:"step"`
-	ResponseFormat string  `json:"response_format" yaml:"response_format"`
+	Prompt         string  `json:"prompt" yaml:"prompt"`                                     // text description of the video to generate
+	NegativePrompt string  `json:"negative_prompt" yaml:"negative_prompt"`                   // things to avoid in the output
+	StartImage     string  `json:"start_image" yaml:"start_image"`                           // URL or base64 of the first frame
+	EndImage       string  `json:"end_image" yaml:"end_image"`                               // URL or base64 of the last frame
+	Width          int32   `json:"width" yaml:"width"`                                       // output width in pixels
+	Height         int32   `json:"height" yaml:"height"`                                     // output height in pixels
+	NumFrames      int32   `json:"num_frames" yaml:"num_frames"`                             // total number of frames to generate
+	FPS            int32   `json:"fps" yaml:"fps"`                                           // frames per second
+	Seconds        string  `json:"seconds,omitempty" yaml:"seconds,omitempty"`               // duration in seconds (alternative to num_frames)
+	Size           string  `json:"size,omitempty" yaml:"size,omitempty"`                     // WxH shorthand (e.g. "512x512")
+	InputReference string  `json:"input_reference,omitempty" yaml:"input_reference,omitempty"` // reference image or video URL
+	Seed           int32   `json:"seed" yaml:"seed"`                                         // random seed for reproducibility
+	CFGScale       float32 `json:"cfg_scale" yaml:"cfg_scale"`                               // classifier-free guidance scale
+	Step           int32   `json:"step" yaml:"step"`                                         // number of diffusion steps
+	ResponseFormat string  `json:"response_format" yaml:"response_format"`                   // output format (url or b64_json)
 }
 
 // @Description TTS request body
@@ -55,7 +55,7 @@ type TTSRequest struct {
 	BasicModelRequest
 	Input    string `json:"input" yaml:"input"` // text input
 	Voice    string `json:"voice" yaml:"voice"` // voice audio file or speaker id
-	Backend  string `json:"backend" yaml:"backend"`
+	Backend  string `json:"backend" yaml:"backend"` // backend engine override
 	Language string `json:"language,omitempty" yaml:"language,omitempty"`               // (optional) language to use with TTS model
 	Format   string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
 	Stream     bool   `json:"stream,omitempty" yaml:"stream,omitempty"`                         // (optional) enable streaming TTS
@@ -65,7 +65,7 @@ type TTSRequest struct {
 // @Description VAD request body
 type VADRequest struct {
 	BasicModelRequest
-	Audio []float32 `json:"audio" yaml:"audio"` // model name or full path
+	Audio []float32 `json:"audio" yaml:"audio"` // raw audio samples as float32 PCM
 }
 
 type VADSegment struct {
@@ -146,13 +146,13 @@ type SysInfoModel struct {
 }
 
 type SystemInformationResponse struct {
-	Backends []string       `json:"backends"`
-	Models   []SysInfoModel `json:"loaded_models"`
+	Backends []string       `json:"backends"`      // available backend engines
+	Models   []SysInfoModel `json:"loaded_models"` // currently loaded models
 }
 
 type DetectionRequest struct {
 	BasicModelRequest
-	Image string `json:"image"`
+	Image string `json:"image"` // URL or base64-encoded image to analyze
 }
 
 type DetectionResponse struct {
diff --git a/core/schema/tokenize.go b/core/schema/tokenize.go
index e481f186333f..5129b6ab7a62 100644
--- a/core/schema/tokenize.go
+++ b/core/schema/tokenize.go
@@ -2,9 +2,9 @@ package schema
 
 type TokenizeRequest struct {
 	BasicModelRequest
-	Content string `json:"content"`
+	Content string `json:"content"` // text to tokenize
 }
 
 type TokenizeResponse struct {
-	Tokens []int32 `json:"tokens"`
+	Tokens []int32 `json:"tokens"` // token IDs
 }
diff --git a/pkg/vram/gguf_reader.go b/pkg/vram/gguf_reader.go
index 631c017f7418..3f731b482d3b 100644
--- a/pkg/vram/gguf_reader.go
+++ b/pkg/vram/gguf_reader.go
@@ -34,10 +34,11 @@ func (defaultGGUFReader) ReadMetadata(ctx context.Context, uri string) (*GGUFMet
 func ggufFileToMeta(f *gguf.GGUFFile) *GGUFMeta {
 	arch := f.Architecture()
 	meta := &GGUFMeta{
-		BlockCount:       uint32(arch.BlockCount),
-		EmbeddingLength:  uint32(arch.EmbeddingLength),
-		HeadCount:        uint32(arch.AttentionHeadCount),
-		HeadCountKV:      uint32(arch.AttentionHeadCountKV),
+		BlockCount:           uint32(arch.BlockCount),
+		EmbeddingLength:      uint32(arch.EmbeddingLength),
+		HeadCount:            uint32(arch.AttentionHeadCount),
+		HeadCountKV:          uint32(arch.AttentionHeadCountKV),
+		MaximumContextLength: arch.MaximumContextLength,
 	}
 	if meta.HeadCountKV == 0 {
 		meta.HeadCountKV = meta.HeadCount
diff --git a/pkg/vram/types.go b/pkg/vram/types.go
index cda76aff6378..35180185de32 100644
--- a/pkg/vram/types.go
+++ b/pkg/vram/types.go
@@ -15,10 +15,11 @@ type SizeResolver interface {
 
 // GGUFMeta holds parsed GGUF metadata used for VRAM estimation.
 type GGUFMeta struct {
-	BlockCount       uint32
-	EmbeddingLength  uint32
-	HeadCount        uint32
-	HeadCountKV      uint32
+	BlockCount            uint32
+	EmbeddingLength       uint32
+	HeadCount             uint32
+	HeadCountKV           uint32
+	MaximumContextLength  uint64
 }
 
 // GGUFMetadataReader reads GGUF metadata from a URI (e.g. via HTTP Range).
@@ -35,8 +36,8 @@ type EstimateOptions struct {
 
 // EstimateResult holds estimated download size and VRAM with display strings.
 type EstimateResult struct {
-	SizeBytes    uint64
-	SizeDisplay  string
-	VRAMBytes    uint64
-	VRAMDisplay  string
+	SizeBytes   uint64 `json:"sizeBytes"`    // total model weight size in bytes
+	SizeDisplay string `json:"sizeDisplay"`  // human-readable size (e.g. "4.2 GB")
+	VRAMBytes   uint64 `json:"vrambytes"`    // estimated VRAM usage in bytes
+	VRAMDisplay string `json:"vramdisplay"`  // human-readable VRAM (e.g. "6.1 GB")
 }
diff --git a/swagger/docs.go b/swagger/docs.go
index 52c3be58def2..74521ff0bb26 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -22,8 +22,609 @@ const docTemplate = `{
     "host": "{{.Host}}",
     "basePath": "{{.BasePath}}",
     "paths": {
+        "/api/agent/jobs": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "List agent jobs",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Filter by task ID",
+                        "name": "task_id",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Filter by status (pending, running, completed, failed, cancelled)",
+                        "name": "status",
+                        "in": "query"
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Max number of jobs to return",
+                        "name": "limit",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Set to 'true' for admin cross-user listing",
+                        "name": "all_users",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "jobs",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/schema.Job"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/jobs/execute": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Execute an agent job",
+                "parameters": [
+                    {
+                        "description": "Job execution request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.JobExecutionRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "job created",
+                        "schema": {
+                            "$ref": "#/definitions/schema.JobExecutionResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/jobs/{id}": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Get an agent job",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Job ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "job",
+                        "schema": {
+                            "$ref": "#/definitions/schema.Job"
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Delete an agent job",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Job ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/jobs/{id}/cancel": {
+            "post": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Cancel an agent job",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Job ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/tasks": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "List agent tasks",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Set to 'true' for admin cross-user listing",
+                        "name": "all_users",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "tasks",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/schema.Task"
+                            }
+                        }
+                    }
+                }
+            },
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Create a new agent task",
+                "parameters": [
+                    {
+                        "description": "Task definition",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.Task"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "id",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/tasks/{id}": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Get an agent task",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "task",
+                        "schema": {
+                            "$ref": "#/definitions/schema.Task"
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "put": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Update an agent task",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "Updated task definition",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.Task"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Delete an agent task",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/tasks/{name}/execute": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Execute an agent task by name",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "Optional template parameters",
+                        "name": "parameters",
+                        "in": "body",
+                        "schema": {
+                            "type": "object"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "job created",
+                        "schema": {
+                            "$ref": "#/definitions/schema.JobExecutionResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/config-json/{name}": {
+            "patch": {
+                "description": "Deep-merges the JSON patch body into the existing model config",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "Partially update a model configuration",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Model name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "success message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/config-metadata": {
+            "get": {
+                "description": "Returns ~170 config fields with types, UI hints, sections, and options",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "List all model configuration field metadata",
+                "responses": {
+                    "200": {
+                        "description": "List of field metadata",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/meta.FieldMeta"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/config-metadata/autocomplete/{provider}": {
+            "get": {
+                "description": "Returns runtime-resolved values for dynamic providers (backends, models)",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "Get dynamic autocomplete values for a config field",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)",
+                        "name": "provider",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "values array",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/vram-estimate": {
+            "post": {
+                "description": "Estimates VRAM based on model weight files, context size, and GPU layers",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "Estimate VRAM usage for a model",
+                "parameters": [
+                    {
+                        "description": "VRAM estimation parameters",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/localai.vramEstimateRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "VRAM estimate",
+                        "schema": {
+                            "$ref": "#/definitions/localai.vramEstimateResponse"
+                        }
+                    }
+                }
+            }
+        },
         "/api/p2p": {
             "get": {
+                "tags": [
+                    "p2p"
+                ],
                 "summary": "Returns available P2P nodes",
                 "responses": {
                     "200": {
@@ -40,6 +641,9 @@ const docTemplate = `{
         },
         "/api/p2p/token": {
             "get": {
+                "tags": [
+                    "p2p"
+                ],
                 "summary": "Show the P2P token",
                 "responses": {
                     "200": {
@@ -53,6 +657,9 @@ const docTemplate = `{
         },
         "/backend/monitor": {
             "get": {
+                "tags": [
+                    "monitoring"
+                ],
                 "summary": "Backend monitor endpoint",
                 "parameters": [
                     {
@@ -77,7 +684,10 @@ const docTemplate = `{
         },
         "/backend/shutdown": {
             "post": {
-                "summary": "Backend monitor endpoint",
+                "tags": [
+                    "monitoring"
+                ],
+                "summary": "Backend shutdown endpoint",
                 "parameters": [
                     {
                         "description": "Backend statistics request",
@@ -94,6 +704,9 @@ const docTemplate = `{
         },
         "/backends": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "List all Backends",
                 "responses": {
                     "200": {
@@ -110,6 +723,9 @@ const docTemplate = `{
         },
         "/backends/apply": {
             "post": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "Install backends to LocalAI.",
                 "parameters": [
                     {
@@ -134,6 +750,9 @@ const docTemplate = `{
         },
         "/backends/available": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "List all available Backends",
                 "responses": {
                     "200": {
@@ -150,6 +769,9 @@ const docTemplate = `{
         },
         "/backends/delete/{name}": {
             "post": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "delete backends from LocalAI.",
                 "parameters": [
                     {
@@ -172,6 +794,9 @@ const docTemplate = `{
         },
         "/backends/galleries": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "List all Galleries",
                 "responses": {
                     "200": {
@@ -188,6 +813,9 @@ const docTemplate = `{
         },
         "/backends/jobs": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "Returns all the jobs status progress",
                 "responses": {
                     "200": {
@@ -204,6 +832,9 @@ const docTemplate = `{
         },
         "/backends/jobs/{uuid}": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "Returns the job status",
                 "responses": {
                     "200": {
@@ -217,23 +848,28 @@ const docTemplate = `{
         },
         "/metrics": {
             "get": {
+                "produces": [
+                    "text/plain"
+                ],
+                "tags": [
+                    "monitoring"
+                ],
                 "summary": "Prometheus metrics endpoint",
-                "parameters": [
-                    {
-                        "description": "Gallery details",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
+                "responses": {
+                    "200": {
+                        "description": "Prometheus metrics",
                         "schema": {
-                            "$ref": "#/definitions/config.Gallery"
+                            "type": "string"
                         }
                     }
-                ],
-                "responses": {}
+                }
             }
         },
         "/models/apply": {
             "post": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "Install models to LocalAI.",
                 "parameters": [
                     {
@@ -258,6 +894,9 @@ const docTemplate = `{
         },
         "/models/available": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "List installable models.",
                 "responses": {
                     "200": {
@@ -265,7 +904,7 @@ const docTemplate = `{
                         "schema": {
                             "type": "array",
                             "items": {
-                                "$ref": "#/definitions/gallery.GalleryModel"
+                                "$ref": "#/definitions/gallery.Metadata"
                             }
                         }
                     }
@@ -274,6 +913,9 @@ const docTemplate = `{
         },
         "/models/delete/{name}": {
             "post": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "delete models to LocalAI.",
                 "parameters": [
                     {
@@ -296,6 +938,9 @@ const docTemplate = `{
         },
         "/models/galleries": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "List all Galleries",
                 "responses": {
                     "200": {
@@ -312,6 +957,9 @@ const docTemplate = `{
         },
         "/models/jobs": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "Returns all the jobs status progress",
                 "responses": {
                     "200": {
@@ -328,6 +976,9 @@ const docTemplate = `{
         },
         "/models/jobs/{uuid}": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "Returns the job status",
                 "responses": {
                     "200": {
@@ -341,6 +992,9 @@ const docTemplate = `{
         },
         "/system": {
             "get": {
+                "tags": [
+                    "monitoring"
+                ],
                 "summary": "Show the LocalAI instance information",
                 "responses": {
                     "200": {
@@ -360,6 +1014,9 @@ const docTemplate = `{
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "tokenize"
+                ],
                 "summary": "Get TokenMetrics for Active Slot.",
                 "responses": {
                     "200": {
@@ -379,6 +1036,9 @@ const docTemplate = `{
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -409,6 +1069,9 @@ const docTemplate = `{
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -436,6 +1099,9 @@ const docTemplate = `{
                 "consumes": [
                     "multipart/form-data"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Transcribes audio into the input language.",
                 "parameters": [
                     {
@@ -468,6 +1134,9 @@ const docTemplate = `{
         },
         "/v1/chat/completions": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Generate a chat completions for a given prompt and model.",
                 "parameters": [
                     {
@@ -492,6 +1161,9 @@ const docTemplate = `{
         },
         "/v1/completions": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Generate completions for a given prompt and model.",
                 "parameters": [
                     {
@@ -516,6 +1188,9 @@ const docTemplate = `{
         },
         "/v1/detection": {
             "post": {
+                "tags": [
+                    "detection"
+                ],
                 "summary": "Detects objects in the input image.",
                 "parameters": [
                     {
@@ -540,6 +1215,9 @@ const docTemplate = `{
         },
         "/v1/edits": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "OpenAI edit endpoint",
                 "parameters": [
                     {
@@ -564,6 +1242,9 @@ const docTemplate = `{
         },
         "/v1/embeddings": {
             "post": {
+                "tags": [
+                    "embeddings"
+                ],
                 "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
                 "parameters": [
                     {
@@ -588,6 +1269,9 @@ const docTemplate = `{
         },
         "/v1/images/generations": {
             "post": {
+                "tags": [
+                    "images"
+                ],
                 "summary": "Creates an image given a prompt.",
                 "parameters": [
                     {
@@ -689,6 +1373,9 @@ const docTemplate = `{
         },
         "/v1/mcp/chat/completions": {
             "post": {
+                "tags": [
+                    "mcp"
+                ],
                 "summary": "MCP chat completions with automatic tool execution",
                 "parameters": [
                     {
@@ -713,6 +1400,9 @@ const docTemplate = `{
         },
         "/v1/messages": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Generate a message response for the given messages and model.",
                 "parameters": [
                     {
@@ -737,6 +1427,9 @@ const docTemplate = `{
         },
         "/v1/models": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "List and describe the various models available in the API.",
                 "responses": {
                     "200": {
@@ -750,6 +1443,9 @@ const docTemplate = `{
         },
         "/v1/rerank": {
             "post": {
+                "tags": [
+                    "rerank"
+                ],
                 "summary": "Reranks a list of phrases by relevance to a given text query.",
                 "parameters": [
                     {
@@ -774,6 +1470,9 @@ const docTemplate = `{
         },
         "/v1/responses": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Create a response using the Open Responses API",
                 "parameters": [
                     {
@@ -799,6 +1498,9 @@ const docTemplate = `{
         "/v1/responses/{id}": {
             "get": {
                 "description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Get a response by ID",
                 "parameters": [
                     {
@@ -848,6 +1550,9 @@ const docTemplate = `{
         "/v1/responses/{id}/cancel": {
             "post": {
                 "description": "Cancel a background response if it's still in progress",
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Cancel a response",
                 "parameters": [
                     {
@@ -884,6 +1589,9 @@ const docTemplate = `{
         },
         "/v1/sound-generation": {
             "post": {
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -908,6 +1616,9 @@ const docTemplate = `{
         },
         "/v1/text-to-speech/{voice-id}": {
             "post": {
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -945,6 +1656,9 @@ const docTemplate = `{
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "tokenize"
+                ],
                 "summary": "Get TokenMetrics for Active Slot.",
                 "responses": {
                     "200": {
@@ -958,6 +1672,9 @@ const docTemplate = `{
         },
         "/v1/tokenize": {
             "post": {
+                "tags": [
+                    "tokenize"
+                ],
                 "summary": "Tokenize the input.",
                 "parameters": [
                     {
@@ -985,6 +1702,9 @@ const docTemplate = `{
                 "consumes": [
                     "application/json"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Detect voice fragments in an audio stream",
                 "parameters": [
                     {
@@ -1009,6 +1729,9 @@ const docTemplate = `{
         },
         "/video": {
             "post": {
+                "tags": [
+                    "video"
+                ],
                 "summary": "Creates a video given a prompt.",
                 "parameters": [
                     {
@@ -1197,7 +1920,74 @@ const docTemplate = `{
                 }
             }
         },
-        "gallery.GalleryModel": {
+        "gallery.Metadata": {
+            "type": "object",
+            "properties": {
+                "backend": {
+                    "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.",
+                    "type": "string"
+                },
+                "description": {
+                    "type": "string"
+                },
+                "files": {
+                    "description": "AdditionalFiles are used to add additional files to the model",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/gallery.File"
+                    }
+                },
+                "gallery": {
+                    "description": "Gallery is a reference to the gallery which contains the model",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/config.Gallery"
+                        }
+                    ]
+                },
+                "icon": {
+                    "type": "string"
+                },
+                "installed": {
+                    "description": "Installed is used to indicate if the model is installed or not",
+                    "type": "boolean"
+                },
+                "license": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "size": {
+                    "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.",
+                    "type": "string"
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "url": {
+                    "type": "string"
+                },
+                "urls": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "localai.GalleryBackend": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                }
+            }
+        },
+        "localai.GalleryModel": {
             "type": "object",
             "properties": {
                 "backend": {
@@ -1230,6 +2020,9 @@ const docTemplate = `{
                 "icon": {
                     "type": "string"
                 },
+                "id": {
+                    "type": "string"
+                },
                 "installed": {
                     "description": "Installed is used to indicate if the model is installed or not",
                     "type": "boolean"
@@ -1266,83 +2059,137 @@ const docTemplate = `{
                 }
             }
         },
-        "localai.GalleryBackend": {
+        "localai.vramEstimateRequest": {
             "type": "object",
             "properties": {
-                "id": {
+                "context_size": {
+                    "description": "context length to estimate for (default 8192)",
+                    "type": "integer"
+                },
+                "gpu_layers": {
+                    "description": "number of layers to offload to GPU (0 = all)",
+                    "type": "integer"
+                },
+                "kv_quant_bits": {
+                    "description": "KV cache quantization bits (0 = fp16)",
+                    "type": "integer"
+                },
+                "model": {
+                    "description": "model name (must be installed)",
                     "type": "string"
                 }
             }
         },
-        "localai.GalleryModel": {
+        "localai.vramEstimateResponse": {
             "type": "object",
             "properties": {
-                "backend": {
-                    "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.",
+                "context_note": {
+                    "description": "note when context_size was defaulted",
                     "type": "string"
                 },
-                "config_file": {
-                    "description": "config_file is read in the situation where URL is blank - and therefore this is a base config.",
-                    "type": "object",
-                    "additionalProperties": true
+                "model_max_context": {
+                    "description": "model's trained maximum context length",
+                    "type": "integer"
+                },
+                "sizeBytes": {
+                    "description": "total model weight size in bytes",
+                    "type": "integer"
+                },
+                "sizeDisplay": {
+                    "description": "human-readable size (e.g. \"4.2 GB\")",
+                    "type": "string"
+                },
+                "vrambytes": {
+                    "description": "estimated VRAM usage in bytes",
+                    "type": "integer"
+                },
+                "vramdisplay": {
+                    "description": "human-readable VRAM (e.g. \"6.1 GB\")",
+                    "type": "string"
+                }
+            }
+        },
+        "meta.FieldMeta": {
+            "type": "object",
+            "properties": {
+                "advanced": {
+                    "type": "boolean"
+                },
+                "autocomplete_provider": {
+                    "description": "\"backends\", \"models:chat\", etc.",
+                    "type": "string"
                 },
+                "component": {
+                    "description": "\"input\", \"number\", \"toggle\", \"select\", \"slider\", etc.",
+                    "type": "string"
+                },
+                "default": {},
                 "description": {
+                    "description": "help text",
+                    "type": "string"
+                },
+                "go_type": {
+                    "description": "\"*int\", \"string\", \"[]string\"",
                     "type": "string"
                 },
-                "files": {
-                    "description": "AdditionalFiles are used to add additional files to the model",
+                "label": {
+                    "description": "human-readable label",
+                    "type": "string"
+                },
+                "max": {
+                    "type": "number"
+                },
+                "min": {
+                    "type": "number"
+                },
+                "options": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/gallery.File"
+                        "$ref": "#/definitions/meta.FieldOption"
                     }
                 },
-                "gallery": {
-                    "description": "Gallery is a reference to the gallery which contains the model",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/config.Gallery"
-                        }
-                    ]
+                "order": {
+                    "type": "integer"
                 },
-                "icon": {
+                "path": {
+                    "description": "dot-path: \"context_size\", \"function.grammar.parallel_calls\"",
                     "type": "string"
                 },
-                "id": {
+                "placeholder": {
                     "type": "string"
                 },
-                "installed": {
-                    "description": "Installed is used to indicate if the model is installed or not",
+                "pointer": {
+                    "description": "true = nil means \"not set\"",
                     "type": "boolean"
                 },
-                "license": {
-                    "type": "string"
-                },
-                "name": {
+                "section": {
+                    "description": "\"general\", \"llm\", \"templates\", etc.",
                     "type": "string"
                 },
-                "overrides": {
-                    "description": "Overrides are used to override the configuration of the model located at URL",
-                    "type": "object",
-                    "additionalProperties": true
+                "step": {
+                    "type": "number"
                 },
-                "size": {
-                    "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.",
+                "ui_type": {
+                    "description": "\"string\", \"int\", \"float\", \"bool\", \"[]string\", \"map\", \"object\"",
                     "type": "string"
                 },
-                "tags": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "vram_impact": {
+                    "type": "boolean"
                 },
-                "url": {
+                "yaml_key": {
+                    "description": "leaf yaml key",
+                    "type": "string"
+                }
+            }
+        },
+        "meta.FieldOption": {
+            "type": "object",
+            "properties": {
+                "label": {
                     "type": "string"
                 },
-                "urls": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "value": {
+                    "type": "string"
                 }
             }
         },
@@ -1639,6 +2486,7 @@ const docTemplate = `{
             "type": "object",
             "properties": {
                 "image": {
+                    "description": "URL or base64-encoded image to analyze",
                     "type": "string"
                 },
                 "model": {
@@ -1848,6 +2696,205 @@ const docTemplate = `{
                 }
             }
         },
+        "schema.Job": {
+            "type": "object",
+            "properties": {
+                "audios": {
+                    "description": "List of audio URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "completed_at": {
+                    "type": "string"
+                },
+                "created_at": {
+                    "type": "string"
+                },
+                "error": {
+                    "description": "Error message if failed",
+                    "type": "string"
+                },
+                "files": {
+                    "description": "List of file URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "id": {
+                    "description": "UUID",
+                    "type": "string"
+                },
+                "images": {
+                    "description": "Multimedia content (for manual execution)\nCan contain URLs or base64-encoded data URIs",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "parameters": {
+                    "description": "Template parameters",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "result": {
+                    "description": "Agent response",
+                    "type": "string"
+                },
+                "started_at": {
+                    "type": "string"
+                },
+                "status": {
+                    "description": "pending, running, completed, failed, cancelled",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/schema.JobStatus"
+                        }
+                    ]
+                },
+                "task_id": {
+                    "description": "Reference to Task",
+                    "type": "string"
+                },
+                "traces": {
+                    "description": "Execution traces (reasoning, tool calls, tool results)",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.JobTrace"
+                    }
+                },
+                "triggered_by": {
+                    "description": "\"manual\", \"cron\", \"api\"",
+                    "type": "string"
+                },
+                "videos": {
+                    "description": "List of video URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "webhook_error": {
+                    "description": "Error if webhook failed",
+                    "type": "string"
+                },
+                "webhook_sent": {
+                    "description": "Webhook delivery tracking",
+                    "type": "boolean"
+                },
+                "webhook_sent_at": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.JobExecutionRequest": {
+            "type": "object",
+            "properties": {
+                "audios": {
+                    "description": "List of audio URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "files": {
+                    "description": "List of file URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "images": {
+                    "description": "Multimedia content (optional, for manual execution)\nCan contain URLs or base64-encoded data URIs",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "parameters": {
+                    "description": "Optional, for templating",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "task_id": {
+                    "description": "Required",
+                    "type": "string"
+                },
+                "videos": {
+                    "description": "List of video URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "schema.JobExecutionResponse": {
+            "type": "object",
+            "properties": {
+                "job_id": {
+                    "description": "unique job identifier",
+                    "type": "string"
+                },
+                "status": {
+                    "description": "initial status (pending)",
+                    "type": "string"
+                },
+                "url": {
+                    "description": "URL to poll for job status",
+                    "type": "string"
+                }
+            }
+        },
+        "schema.JobStatus": {
+            "type": "string",
+            "enum": [
+                "pending",
+                "running",
+                "completed",
+                "failed",
+                "cancelled"
+            ],
+            "x-enum-varnames": [
+                "JobStatusPending",
+                "JobStatusRunning",
+                "JobStatusCompleted",
+                "JobStatusFailed",
+                "JobStatusCancelled"
+            ]
+        },
+        "schema.JobTrace": {
+            "type": "object",
+            "properties": {
+                "arguments": {
+                    "description": "Tool arguments or result data",
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "content": {
+                    "description": "The actual trace content",
+                    "type": "string"
+                },
+                "timestamp": {
+                    "description": "When this trace occurred",
+                    "type": "string"
+                },
+                "tool_name": {
+                    "description": "Tool name (for tool_call/tool_result)",
+                    "type": "string"
+                },
+                "type": {
+                    "description": "\"reasoning\", \"tool_call\", \"tool_result\", \"status\"",
+                    "type": "string"
+                }
+            }
+        },
         "schema.LogprobContent": {
             "type": "object",
             "properties": {
@@ -1961,6 +3008,26 @@ const docTemplate = `{
                 }
             }
         },
+        "schema.MultimediaSourceConfig": {
+            "type": "object",
+            "properties": {
+                "headers": {
+                    "description": "Custom headers for HTTP request (e.g., Authorization)",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "type": {
+                    "description": "\"image\", \"video\", \"audio\", \"file\"",
+                    "type": "string"
+                },
+                "url": {
+                    "description": "URL to fetch from",
+                    "type": "string"
+                }
+            }
+        },
         "schema.NodeData": {
             "type": "object",
             "properties": {
@@ -2835,12 +3902,14 @@ const docTemplate = `{
             "type": "object",
             "properties": {
                 "backends": {
+                    "description": "available backend engines",
                     "type": "array",
                     "items": {
                         "type": "string"
                     }
                 },
                 "loaded_models": {
+                    "description": "currently loaded models",
                     "type": "array",
                     "items": {
                         "$ref": "#/definitions/schema.SysInfoModel"
@@ -2853,6 +3922,7 @@ const docTemplate = `{
             "type": "object",
             "properties": {
                 "backend": {
+                    "description": "backend engine override",
                     "type": "string"
                 },
                 "input": {
@@ -2884,10 +3954,71 @@ const docTemplate = `{
                 }
             }
         },
+        "schema.Task": {
+            "type": "object",
+            "properties": {
+                "created_at": {
+                    "type": "string"
+                },
+                "cron": {
+                    "description": "Optional cron expression",
+                    "type": "string"
+                },
+                "cron_parameters": {
+                    "description": "Parameters to use when executing cron jobs",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "description": {
+                    "description": "Optional description",
+                    "type": "string"
+                },
+                "enabled": {
+                    "description": "Can be disabled without deletion",
+                    "type": "boolean"
+                },
+                "id": {
+                    "description": "UUID",
+                    "type": "string"
+                },
+                "model": {
+                    "description": "Model name (must have MCP config)",
+                    "type": "string"
+                },
+                "multimedia_sources": {
+                    "description": "Multimedia sources (for cron jobs)\nURLs to fetch multimedia content from when cron job executes\nEach source can have custom headers for authentication/authorization",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.MultimediaSourceConfig"
+                    }
+                },
+                "name": {
+                    "description": "User-friendly name",
+                    "type": "string"
+                },
+                "prompt": {
+                    "description": "Template prompt (supports {{.param}} syntax)",
+                    "type": "string"
+                },
+                "updated_at": {
+                    "type": "string"
+                },
+                "webhooks": {
+                    "description": "Webhook configuration (for notifications)\nSupport multiple webhook endpoints\nWebhooks can handle both success and failure cases using template variables:\n- {{.Job}} - Job object with all fields\n- {{.Task}} - Task object\n- {{.Result}} - Job result (if successful)\n- {{.Error}} - Error message (if failed, empty string if successful)\n- {{.Status}} - Job status string",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.WebhookConfig"
+                    }
+                }
+            }
+        },
         "schema.TokenizeRequest": {
             "type": "object",
             "properties": {
                 "content": {
+                    "description": "text to tokenize",
                     "type": "string"
                 },
                 "model": {
@@ -2899,6 +4030,7 @@ const docTemplate = `{
             "type": "object",
             "properties": {
                 "tokens": {
+                    "description": "token IDs",
                     "type": "array",
                     "items": {
                         "type": "integer"
@@ -2928,7 +4060,7 @@ const docTemplate = `{
             "type": "object",
             "properties": {
                 "audio": {
-                    "description": "model name or full path",
+                    "description": "raw audio samples as float32 PCM",
                     "type": "array",
                     "items": {
                         "type": "number"
@@ -2943,55 +4075,94 @@ const docTemplate = `{
             "type": "object",
             "properties": {
                 "cfg_scale": {
+                    "description": "classifier-free guidance scale",
                     "type": "number"
                 },
                 "end_image": {
+                    "description": "URL or base64 of the last frame",
                     "type": "string"
                 },
                 "fps": {
+                    "description": "frames per second",
                     "type": "integer"
                 },
                 "height": {
+                    "description": "output height in pixels",
                     "type": "integer"
                 },
                 "input_reference": {
+                    "description": "reference image or video URL",
                     "type": "string"
                 },
                 "model": {
                     "type": "string"
                 },
                 "negative_prompt": {
+                    "description": "things to avoid in the output",
                     "type": "string"
                 },
                 "num_frames": {
+                    "description": "total number of frames to generate",
                     "type": "integer"
                 },
                 "prompt": {
+                    "description": "text description of the video to generate",
                     "type": "string"
                 },
                 "response_format": {
+                    "description": "output format (url or b64_json)",
                     "type": "string"
                 },
                 "seconds": {
+                    "description": "duration in seconds (alternative to num_frames)",
                     "type": "string"
                 },
                 "seed": {
+                    "description": "random seed for reproducibility",
                     "type": "integer"
                 },
                 "size": {
+                    "description": "WxH shorthand (e.g. \"512x512\")",
                     "type": "string"
                 },
                 "start_image": {
+                    "description": "URL or base64 of the first frame",
                     "type": "string"
                 },
                 "step": {
+                    "description": "number of diffusion steps",
                     "type": "integer"
                 },
                 "width": {
+                    "description": "output width in pixels",
                     "type": "integer"
                 }
             }
         },
+        "schema.WebhookConfig": {
+            "type": "object",
+            "properties": {
+                "headers": {
+                    "description": "Custom headers (e.g., Authorization)",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "method": {
+                    "description": "HTTP method (POST, PUT, PATCH) - default: POST",
+                    "type": "string"
+                },
+                "payload_template": {
+                    "description": "Optional template for payload",
+                    "type": "string"
+                },
+                "url": {
+                    "description": "Webhook endpoint URL",
+                    "type": "string"
+                }
+            }
+        },
         "services.GalleryOpStatus": {
             "type": "object",
             "properties": {
diff --git a/swagger/embed.go b/swagger/embed.go
new file mode 100644
index 000000000000..167d1049658d
--- /dev/null
+++ b/swagger/embed.go
@@ -0,0 +1,6 @@
+package swagger
+
+import _ "embed"
+
+//go:embed swagger.json
+var SwaggerJSON []byte
diff --git a/swagger/swagger.json b/swagger/swagger.json
index ad759ffcb8b6..0fea89317527 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -15,8 +15,609 @@
     },
     "basePath": "/",
     "paths": {
+        "/api/agent/jobs": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "List agent jobs",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Filter by task ID",
+                        "name": "task_id",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Filter by status (pending, running, completed, failed, cancelled)",
+                        "name": "status",
+                        "in": "query"
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Max number of jobs to return",
+                        "name": "limit",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Set to 'true' for admin cross-user listing",
+                        "name": "all_users",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "jobs",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/schema.Job"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/jobs/execute": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Execute an agent job",
+                "parameters": [
+                    {
+                        "description": "Job execution request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.JobExecutionRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "job created",
+                        "schema": {
+                            "$ref": "#/definitions/schema.JobExecutionResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/jobs/{id}": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Get an agent job",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Job ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "job",
+                        "schema": {
+                            "$ref": "#/definitions/schema.Job"
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Delete an agent job",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Job ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/jobs/{id}/cancel": {
+            "post": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Cancel an agent job",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Job ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/tasks": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "List agent tasks",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Set to 'true' for admin cross-user listing",
+                        "name": "all_users",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "tasks",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/schema.Task"
+                            }
+                        }
+                    }
+                }
+            },
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Create a new agent task",
+                "parameters": [
+                    {
+                        "description": "Task definition",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.Task"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "id",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/tasks/{id}": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Get an agent task",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "task",
+                        "schema": {
+                            "$ref": "#/definitions/schema.Task"
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "put": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Update an agent task",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "Updated task definition",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.Task"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Delete an agent task",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/agent/tasks/{name}/execute": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "agent-jobs"
+                ],
+                "summary": "Execute an agent task by name",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Task name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "Optional template parameters",
+                        "name": "parameters",
+                        "in": "body",
+                        "schema": {
+                            "type": "object"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "job created",
+                        "schema": {
+                            "$ref": "#/definitions/schema.JobExecutionResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/config-json/{name}": {
+            "patch": {
+                "description": "Deep-merges the JSON patch body into the existing model config",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "Partially update a model configuration",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Model name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "success message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/config-metadata": {
+            "get": {
+                "description": "Returns ~170 config fields with types, UI hints, sections, and options",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "List all model configuration field metadata",
+                "responses": {
+                    "200": {
+                        "description": "List of field metadata",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/meta.FieldMeta"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/config-metadata/autocomplete/{provider}": {
+            "get": {
+                "description": "Returns runtime-resolved values for dynamic providers (backends, models)",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "Get dynamic autocomplete values for a config field",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)",
+                        "name": "provider",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "values array",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    }
+                }
+            }
+        },
+        "/api/models/vram-estimate": {
+            "post": {
+                "description": "Estimates VRAM based on model weight files, context size, and GPU layers",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "config"
+                ],
+                "summary": "Estimate VRAM usage for a model",
+                "parameters": [
+                    {
+                        "description": "VRAM estimation parameters",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/localai.vramEstimateRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "VRAM estimate",
+                        "schema": {
+                            "$ref": "#/definitions/localai.vramEstimateResponse"
+                        }
+                    }
+                }
+            }
+        },
         "/api/p2p": {
             "get": {
+                "tags": [
+                    "p2p"
+                ],
                 "summary": "Returns available P2P nodes",
                 "responses": {
                     "200": {
@@ -33,6 +634,9 @@
         },
         "/api/p2p/token": {
             "get": {
+                "tags": [
+                    "p2p"
+                ],
                 "summary": "Show the P2P token",
                 "responses": {
                     "200": {
@@ -46,6 +650,9 @@
         },
         "/backend/monitor": {
             "get": {
+                "tags": [
+                    "monitoring"
+                ],
                 "summary": "Backend monitor endpoint",
                 "parameters": [
                     {
@@ -70,7 +677,10 @@
         },
         "/backend/shutdown": {
             "post": {
-                "summary": "Backend monitor endpoint",
+                "tags": [
+                    "monitoring"
+                ],
+                "summary": "Backend shutdown endpoint",
                 "parameters": [
                     {
                         "description": "Backend statistics request",
@@ -87,6 +697,9 @@
         },
         "/backends": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "List all Backends",
                 "responses": {
                     "200": {
@@ -103,6 +716,9 @@
         },
         "/backends/apply": {
             "post": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "Install backends to LocalAI.",
                 "parameters": [
                     {
@@ -127,6 +743,9 @@
         },
         "/backends/available": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "List all available Backends",
                 "responses": {
                     "200": {
@@ -143,6 +762,9 @@
         },
         "/backends/delete/{name}": {
             "post": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "delete backends from LocalAI.",
                 "parameters": [
                     {
@@ -165,6 +787,9 @@
         },
         "/backends/galleries": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "List all Galleries",
                 "responses": {
                     "200": {
@@ -181,6 +806,9 @@
         },
         "/backends/jobs": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "Returns all the jobs status progress",
                 "responses": {
                     "200": {
@@ -197,6 +825,9 @@
         },
         "/backends/jobs/{uuid}": {
             "get": {
+                "tags": [
+                    "backends"
+                ],
                 "summary": "Returns the job status",
                 "responses": {
                     "200": {
@@ -210,23 +841,28 @@
         },
         "/metrics": {
             "get": {
+                "produces": [
+                    "text/plain"
+                ],
+                "tags": [
+                    "monitoring"
+                ],
                 "summary": "Prometheus metrics endpoint",
-                "parameters": [
-                    {
-                        "description": "Gallery details",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
+                "responses": {
+                    "200": {
+                        "description": "Prometheus metrics",
                         "schema": {
-                            "$ref": "#/definitions/config.Gallery"
+                            "type": "string"
                         }
                     }
-                ],
-                "responses": {}
+                }
             }
         },
         "/models/apply": {
             "post": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "Install models to LocalAI.",
                 "parameters": [
                     {
@@ -251,6 +887,9 @@
         },
         "/models/available": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "List installable models.",
                 "responses": {
                     "200": {
@@ -258,7 +897,7 @@
                         "schema": {
                             "type": "array",
                             "items": {
-                                "$ref": "#/definitions/gallery.GalleryModel"
+                                "$ref": "#/definitions/gallery.Metadata"
                             }
                         }
                     }
@@ -267,6 +906,9 @@
         },
         "/models/delete/{name}": {
             "post": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "delete models to LocalAI.",
                 "parameters": [
                     {
@@ -289,6 +931,9 @@
         },
         "/models/galleries": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "List all Galleries",
                 "responses": {
                     "200": {
@@ -305,6 +950,9 @@
         },
         "/models/jobs": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "Returns all the jobs status progress",
                 "responses": {
                     "200": {
@@ -321,6 +969,9 @@
         },
         "/models/jobs/{uuid}": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "Returns the job status",
                 "responses": {
                     "200": {
@@ -334,6 +985,9 @@
         },
         "/system": {
             "get": {
+                "tags": [
+                    "monitoring"
+                ],
                 "summary": "Show the LocalAI instance information",
                 "responses": {
                     "200": {
@@ -353,6 +1007,9 @@
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "tokenize"
+                ],
                 "summary": "Get TokenMetrics for Active Slot.",
                 "responses": {
                     "200": {
@@ -372,6 +1029,9 @@
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -402,6 +1062,9 @@
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -429,6 +1092,9 @@
                 "consumes": [
                     "multipart/form-data"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Transcribes audio into the input language.",
                 "parameters": [
                     {
@@ -461,6 +1127,9 @@
         },
         "/v1/chat/completions": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Generate a chat completions for a given prompt and model.",
                 "parameters": [
                     {
@@ -485,6 +1154,9 @@
         },
         "/v1/completions": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Generate completions for a given prompt and model.",
                 "parameters": [
                     {
@@ -509,6 +1181,9 @@
         },
         "/v1/detection": {
             "post": {
+                "tags": [
+                    "detection"
+                ],
                 "summary": "Detects objects in the input image.",
                 "parameters": [
                     {
@@ -533,6 +1208,9 @@
         },
         "/v1/edits": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "OpenAI edit endpoint",
                 "parameters": [
                     {
@@ -557,6 +1235,9 @@
         },
         "/v1/embeddings": {
             "post": {
+                "tags": [
+                    "embeddings"
+                ],
                 "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
                 "parameters": [
                     {
@@ -581,6 +1262,9 @@
         },
         "/v1/images/generations": {
             "post": {
+                "tags": [
+                    "images"
+                ],
                 "summary": "Creates an image given a prompt.",
                 "parameters": [
                     {
@@ -682,6 +1366,9 @@
         },
         "/v1/mcp/chat/completions": {
             "post": {
+                "tags": [
+                    "mcp"
+                ],
                 "summary": "MCP chat completions with automatic tool execution",
                 "parameters": [
                     {
@@ -706,6 +1393,9 @@
         },
         "/v1/messages": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Generate a message response for the given messages and model.",
                 "parameters": [
                     {
@@ -730,6 +1420,9 @@
         },
         "/v1/models": {
             "get": {
+                "tags": [
+                    "models"
+                ],
                 "summary": "List and describe the various models available in the API.",
                 "responses": {
                     "200": {
@@ -743,6 +1436,9 @@
         },
         "/v1/rerank": {
             "post": {
+                "tags": [
+                    "rerank"
+                ],
                 "summary": "Reranks a list of phrases by relevance to a given text query.",
                 "parameters": [
                     {
@@ -767,6 +1463,9 @@
         },
         "/v1/responses": {
             "post": {
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Create a response using the Open Responses API",
                 "parameters": [
                     {
@@ -792,6 +1491,9 @@
         "/v1/responses/{id}": {
             "get": {
                 "description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Get a response by ID",
                 "parameters": [
                     {
@@ -841,6 +1543,9 @@
         "/v1/responses/{id}/cancel": {
             "post": {
                 "description": "Cancel a background response if it's still in progress",
+                "tags": [
+                    "inference"
+                ],
                 "summary": "Cancel a response",
                 "parameters": [
                     {
@@ -877,6 +1582,9 @@
         },
         "/v1/sound-generation": {
             "post": {
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -901,6 +1609,9 @@
         },
         "/v1/text-to-speech/{voice-id}": {
             "post": {
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -938,6 +1649,9 @@
                 "produces": [
                     "audio/x-wav"
                 ],
+                "tags": [
+                    "tokenize"
+                ],
                 "summary": "Get TokenMetrics for Active Slot.",
                 "responses": {
                     "200": {
@@ -951,6 +1665,9 @@
         },
         "/v1/tokenize": {
             "post": {
+                "tags": [
+                    "tokenize"
+                ],
                 "summary": "Tokenize the input.",
                 "parameters": [
                     {
@@ -978,6 +1695,9 @@
                 "consumes": [
                     "application/json"
                 ],
+                "tags": [
+                    "audio"
+                ],
                 "summary": "Detect voice fragments in an audio stream",
                 "parameters": [
                     {
@@ -1002,6 +1722,9 @@
         },
         "/video": {
             "post": {
+                "tags": [
+                    "video"
+                ],
                 "summary": "Creates a video given a prompt.",
                 "parameters": [
                     {
@@ -1190,7 +1913,74 @@
                 }
             }
         },
-        "gallery.GalleryModel": {
+        "gallery.Metadata": {
+            "type": "object",
+            "properties": {
+                "backend": {
+                    "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.",
+                    "type": "string"
+                },
+                "description": {
+                    "type": "string"
+                },
+                "files": {
+                    "description": "AdditionalFiles are used to add additional files to the model",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/gallery.File"
+                    }
+                },
+                "gallery": {
+                    "description": "Gallery is a reference to the gallery which contains the model",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/config.Gallery"
+                        }
+                    ]
+                },
+                "icon": {
+                    "type": "string"
+                },
+                "installed": {
+                    "description": "Installed is used to indicate if the model is installed or not",
+                    "type": "boolean"
+                },
+                "license": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "size": {
+                    "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.",
+                    "type": "string"
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "url": {
+                    "type": "string"
+                },
+                "urls": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "localai.GalleryBackend": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                }
+            }
+        },
+        "localai.GalleryModel": {
             "type": "object",
             "properties": {
                 "backend": {
@@ -1223,6 +2013,9 @@
                 "icon": {
                     "type": "string"
                 },
+                "id": {
+                    "type": "string"
+                },
                 "installed": {
                     "description": "Installed is used to indicate if the model is installed or not",
                     "type": "boolean"
@@ -1259,83 +2052,137 @@
                 }
             }
         },
-        "localai.GalleryBackend": {
+        "localai.vramEstimateRequest": {
             "type": "object",
             "properties": {
-                "id": {
+                "context_size": {
+                    "description": "context length to estimate for (default 8192)",
+                    "type": "integer"
+                },
+                "gpu_layers": {
+                    "description": "number of layers to offload to GPU (0 = all)",
+                    "type": "integer"
+                },
+                "kv_quant_bits": {
+                    "description": "KV cache quantization bits (0 = fp16)",
+                    "type": "integer"
+                },
+                "model": {
+                    "description": "model name (must be installed)",
                     "type": "string"
                 }
             }
         },
-        "localai.GalleryModel": {
+        "localai.vramEstimateResponse": {
             "type": "object",
             "properties": {
-                "backend": {
-                    "description": "Backend is the resolved backend engine for this model (e.g. \"llama-cpp\").\nPopulated at load time from overrides, inline config, or the URL-referenced config file.",
+                "context_note": {
+                    "description": "note when context_size was defaulted",
                     "type": "string"
                 },
-                "config_file": {
-                    "description": "config_file is read in the situation where URL is blank - and therefore this is a base config.",
-                    "type": "object",
-                    "additionalProperties": true
+                "model_max_context": {
+                    "description": "model's trained maximum context length",
+                    "type": "integer"
+                },
+                "sizeBytes": {
+                    "description": "total model weight size in bytes",
+                    "type": "integer"
+                },
+                "sizeDisplay": {
+                    "description": "human-readable size (e.g. \"4.2 GB\")",
+                    "type": "string"
+                },
+                "vrambytes": {
+                    "description": "estimated VRAM usage in bytes",
+                    "type": "integer"
+                },
+                "vramdisplay": {
+                    "description": "human-readable VRAM (e.g. \"6.1 GB\")",
+                    "type": "string"
+                }
+            }
+        },
+        "meta.FieldMeta": {
+            "type": "object",
+            "properties": {
+                "advanced": {
+                    "type": "boolean"
+                },
+                "autocomplete_provider": {
+                    "description": "\"backends\", \"models:chat\", etc.",
+                    "type": "string"
                 },
+                "component": {
+                    "description": "\"input\", \"number\", \"toggle\", \"select\", \"slider\", etc.",
+                    "type": "string"
+                },
+                "default": {},
                 "description": {
+                    "description": "help text",
+                    "type": "string"
+                },
+                "go_type": {
+                    "description": "\"*int\", \"string\", \"[]string\"",
                     "type": "string"
                 },
-                "files": {
-                    "description": "AdditionalFiles are used to add additional files to the model",
+                "label": {
+                    "description": "human-readable label",
+                    "type": "string"
+                },
+                "max": {
+                    "type": "number"
+                },
+                "min": {
+                    "type": "number"
+                },
+                "options": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/gallery.File"
+                        "$ref": "#/definitions/meta.FieldOption"
                     }
                 },
-                "gallery": {
-                    "description": "Gallery is a reference to the gallery which contains the model",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/config.Gallery"
-                        }
-                    ]
+                "order": {
+                    "type": "integer"
                 },
-                "icon": {
+                "path": {
+                    "description": "dot-path: \"context_size\", \"function.grammar.parallel_calls\"",
                     "type": "string"
                 },
-                "id": {
+                "placeholder": {
                     "type": "string"
                 },
-                "installed": {
-                    "description": "Installed is used to indicate if the model is installed or not",
+                "pointer": {
+                    "description": "true = nil means \"not set\"",
                     "type": "boolean"
                 },
-                "license": {
-                    "type": "string"
-                },
-                "name": {
+                "section": {
+                    "description": "\"general\", \"llm\", \"templates\", etc.",
                     "type": "string"
                 },
-                "overrides": {
-                    "description": "Overrides are used to override the configuration of the model located at URL",
-                    "type": "object",
-                    "additionalProperties": true
+                "step": {
+                    "type": "number"
                 },
-                "size": {
-                    "description": "Size is an optional hardcoded model size string (e.g. \"500MB\", \"14.5GB\").\nUsed when the size cannot be estimated automatically.",
+                "ui_type": {
+                    "description": "\"string\", \"int\", \"float\", \"bool\", \"[]string\", \"map\", \"object\"",
                     "type": "string"
                 },
-                "tags": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "vram_impact": {
+                    "type": "boolean"
                 },
-                "url": {
+                "yaml_key": {
+                    "description": "leaf yaml key",
+                    "type": "string"
+                }
+            }
+        },
+        "meta.FieldOption": {
+            "type": "object",
+            "properties": {
+                "label": {
                     "type": "string"
                 },
-                "urls": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "value": {
+                    "type": "string"
                 }
             }
         },
@@ -1632,6 +2479,7 @@
             "type": "object",
             "properties": {
                 "image": {
+                    "description": "URL or base64-encoded image to analyze",
                     "type": "string"
                 },
                 "model": {
@@ -1841,6 +2689,205 @@
                 }
             }
         },
+        "schema.Job": {
+            "type": "object",
+            "properties": {
+                "audios": {
+                    "description": "List of audio URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "completed_at": {
+                    "type": "string"
+                },
+                "created_at": {
+                    "type": "string"
+                },
+                "error": {
+                    "description": "Error message if failed",
+                    "type": "string"
+                },
+                "files": {
+                    "description": "List of file URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "id": {
+                    "description": "UUID",
+                    "type": "string"
+                },
+                "images": {
+                    "description": "Multimedia content (for manual execution)\nCan contain URLs or base64-encoded data URIs",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "parameters": {
+                    "description": "Template parameters",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "result": {
+                    "description": "Agent response",
+                    "type": "string"
+                },
+                "started_at": {
+                    "type": "string"
+                },
+                "status": {
+                    "description": "pending, running, completed, failed, cancelled",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/schema.JobStatus"
+                        }
+                    ]
+                },
+                "task_id": {
+                    "description": "Reference to Task",
+                    "type": "string"
+                },
+                "traces": {
+                    "description": "Execution traces (reasoning, tool calls, tool results)",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.JobTrace"
+                    }
+                },
+                "triggered_by": {
+                    "description": "\"manual\", \"cron\", \"api\"",
+                    "type": "string"
+                },
+                "videos": {
+                    "description": "List of video URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "webhook_error": {
+                    "description": "Error if webhook failed",
+                    "type": "string"
+                },
+                "webhook_sent": {
+                    "description": "Webhook delivery tracking",
+                    "type": "boolean"
+                },
+                "webhook_sent_at": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.JobExecutionRequest": {
+            "type": "object",
+            "properties": {
+                "audios": {
+                    "description": "List of audio URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "files": {
+                    "description": "List of file URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "images": {
+                    "description": "Multimedia content (optional, for manual execution)\nCan contain URLs or base64-encoded data URIs",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "parameters": {
+                    "description": "Optional, for templating",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "task_id": {
+                    "description": "Required",
+                    "type": "string"
+                },
+                "videos": {
+                    "description": "List of video URLs or base64 strings",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "schema.JobExecutionResponse": {
+            "type": "object",
+            "properties": {
+                "job_id": {
+                    "description": "unique job identifier",
+                    "type": "string"
+                },
+                "status": {
+                    "description": "initial status (pending)",
+                    "type": "string"
+                },
+                "url": {
+                    "description": "URL to poll for job status",
+                    "type": "string"
+                }
+            }
+        },
+        "schema.JobStatus": {
+            "type": "string",
+            "enum": [
+                "pending",
+                "running",
+                "completed",
+                "failed",
+                "cancelled"
+            ],
+            "x-enum-varnames": [
+                "JobStatusPending",
+                "JobStatusRunning",
+                "JobStatusCompleted",
+                "JobStatusFailed",
+                "JobStatusCancelled"
+            ]
+        },
+        "schema.JobTrace": {
+            "type": "object",
+            "properties": {
+                "arguments": {
+                    "description": "Tool arguments or result data",
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "content": {
+                    "description": "The actual trace content",
+                    "type": "string"
+                },
+                "timestamp": {
+                    "description": "When this trace occurred",
+                    "type": "string"
+                },
+                "tool_name": {
+                    "description": "Tool name (for tool_call/tool_result)",
+                    "type": "string"
+                },
+                "type": {
+                    "description": "\"reasoning\", \"tool_call\", \"tool_result\", \"status\"",
+                    "type": "string"
+                }
+            }
+        },
         "schema.LogprobContent": {
             "type": "object",
             "properties": {
@@ -1954,6 +3001,26 @@
                 }
             }
         },
+        "schema.MultimediaSourceConfig": {
+            "type": "object",
+            "properties": {
+                "headers": {
+                    "description": "Custom headers for HTTP request (e.g., Authorization)",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "type": {
+                    "description": "\"image\", \"video\", \"audio\", \"file\"",
+                    "type": "string"
+                },
+                "url": {
+                    "description": "URL to fetch from",
+                    "type": "string"
+                }
+            }
+        },
         "schema.NodeData": {
             "type": "object",
             "properties": {
@@ -2828,12 +3895,14 @@
             "type": "object",
             "properties": {
                 "backends": {
+                    "description": "available backend engines",
                     "type": "array",
                     "items": {
                         "type": "string"
                     }
                 },
                 "loaded_models": {
+                    "description": "currently loaded models",
                     "type": "array",
                     "items": {
                         "$ref": "#/definitions/schema.SysInfoModel"
@@ -2846,6 +3915,7 @@
             "type": "object",
             "properties": {
                 "backend": {
+                    "description": "backend engine override",
                     "type": "string"
                 },
                 "input": {
@@ -2877,10 +3947,71 @@
                 }
             }
         },
+        "schema.Task": {
+            "type": "object",
+            "properties": {
+                "created_at": {
+                    "type": "string"
+                },
+                "cron": {
+                    "description": "Optional cron expression",
+                    "type": "string"
+                },
+                "cron_parameters": {
+                    "description": "Parameters to use when executing cron jobs",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "description": {
+                    "description": "Optional description",
+                    "type": "string"
+                },
+                "enabled": {
+                    "description": "Can be disabled without deletion",
+                    "type": "boolean"
+                },
+                "id": {
+                    "description": "UUID",
+                    "type": "string"
+                },
+                "model": {
+                    "description": "Model name (must have MCP config)",
+                    "type": "string"
+                },
+                "multimedia_sources": {
+                    "description": "Multimedia sources (for cron jobs)\nURLs to fetch multimedia content from when cron job executes\nEach source can have custom headers for authentication/authorization",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.MultimediaSourceConfig"
+                    }
+                },
+                "name": {
+                    "description": "User-friendly name",
+                    "type": "string"
+                },
+                "prompt": {
+                    "description": "Template prompt (supports {{.param}} syntax)",
+                    "type": "string"
+                },
+                "updated_at": {
+                    "type": "string"
+                },
+                "webhooks": {
+                    "description": "Webhook configuration (for notifications)\nSupport multiple webhook endpoints\nWebhooks can handle both success and failure cases using template variables:\n- {{.Job}} - Job object with all fields\n- {{.Task}} - Task object\n- {{.Result}} - Job result (if successful)\n- {{.Error}} - Error message (if failed, empty string if successful)\n- {{.Status}} - Job status string",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.WebhookConfig"
+                    }
+                }
+            }
+        },
         "schema.TokenizeRequest": {
             "type": "object",
             "properties": {
                 "content": {
+                    "description": "text to tokenize",
                     "type": "string"
                 },
                 "model": {
@@ -2892,6 +4023,7 @@
             "type": "object",
             "properties": {
                 "tokens": {
+                    "description": "token IDs",
                     "type": "array",
                     "items": {
                         "type": "integer"
@@ -2921,7 +4053,7 @@
             "type": "object",
             "properties": {
                 "audio": {
-                    "description": "model name or full path",
+                    "description": "raw audio samples as float32 PCM",
                     "type": "array",
                     "items": {
                         "type": "number"
@@ -2936,55 +4068,94 @@
             "type": "object",
             "properties": {
                 "cfg_scale": {
+                    "description": "classifier-free guidance scale",
                     "type": "number"
                 },
                 "end_image": {
+                    "description": "URL or base64 of the last frame",
                     "type": "string"
                 },
                 "fps": {
+                    "description": "frames per second",
                     "type": "integer"
                 },
                 "height": {
+                    "description": "output height in pixels",
                     "type": "integer"
                 },
                 "input_reference": {
+                    "description": "reference image or video URL",
                     "type": "string"
                 },
                 "model": {
                     "type": "string"
                 },
                 "negative_prompt": {
+                    "description": "things to avoid in the output",
                     "type": "string"
                 },
                 "num_frames": {
+                    "description": "total number of frames to generate",
                     "type": "integer"
                 },
                 "prompt": {
+                    "description": "text description of the video to generate",
                     "type": "string"
                 },
                 "response_format": {
+                    "description": "output format (url or b64_json)",
                     "type": "string"
                 },
                 "seconds": {
+                    "description": "duration in seconds (alternative to num_frames)",
                     "type": "string"
                 },
                 "seed": {
+                    "description": "random seed for reproducibility",
                     "type": "integer"
                 },
                 "size": {
+                    "description": "WxH shorthand (e.g. \"512x512\")",
                     "type": "string"
                 },
                 "start_image": {
+                    "description": "URL or base64 of the first frame",
                     "type": "string"
                 },
                 "step": {
+                    "description": "number of diffusion steps",
                     "type": "integer"
                 },
                 "width": {
+                    "description": "output width in pixels",
                     "type": "integer"
                 }
             }
         },
+        "schema.WebhookConfig": {
+            "type": "object",
+            "properties": {
+                "headers": {
+                    "description": "Custom headers (e.g., Authorization)",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "method": {
+                    "description": "HTTP method (POST, PUT, PATCH) - default: POST",
+                    "type": "string"
+                },
+                "payload_template": {
+                    "description": "Optional template for payload",
+                    "type": "string"
+                },
+                "url": {
+                    "description": "Webhook endpoint URL",
+                    "type": "string"
+                }
+            }
+        },
         "services.GalleryOpStatus": {
             "type": "object",
             "properties": {
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 974b5b0b4ac5..c327710c87ad 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -112,18 +112,13 @@ definitions:
           type: string
         type: array
     type: object
-  gallery.GalleryModel:
+  gallery.Metadata:
     properties:
       backend:
         description: |-
           Backend is the resolved backend engine for this model (e.g. "llama-cpp").
           Populated at load time from overrides, inline config, or the URL-referenced config file.
         type: string
-      config_file:
-        additionalProperties: true
-        description: config_file is read in the situation where URL is blank - and
-          therefore this is a base config.
-        type: object
       description:
         type: string
       files:
@@ -144,11 +139,6 @@ definitions:
         type: string
       name:
         type: string
-      overrides:
-        additionalProperties: true
-        description: Overrides are used to override the configuration of the model
-          located at URL
-        type: object
       size:
         description: |-
           Size is an optional hardcoded model size string (e.g. "500MB", "14.5GB").
@@ -225,6 +215,101 @@ definitions:
           type: string
         type: array
     type: object
+  localai.vramEstimateRequest:
+    properties:
+      context_size:
+        description: context length to estimate for (default 8192)
+        type: integer
+      gpu_layers:
+        description: number of layers to offload to GPU (0 = all)
+        type: integer
+      kv_quant_bits:
+        description: KV cache quantization bits (0 = fp16)
+        type: integer
+      model:
+        description: model name (must be installed)
+        type: string
+    type: object
+  localai.vramEstimateResponse:
+    properties:
+      context_note:
+        description: note when context_size was defaulted
+        type: string
+      model_max_context:
+        description: model's trained maximum context length
+        type: integer
+      sizeBytes:
+        description: total model weight size in bytes
+        type: integer
+      sizeDisplay:
+        description: human-readable size (e.g. "4.2 GB")
+        type: string
+      vrambytes:
+        description: estimated VRAM usage in bytes
+        type: integer
+      vramdisplay:
+        description: human-readable VRAM (e.g. "6.1 GB")
+        type: string
+    type: object
+  meta.FieldMeta:
+    properties:
+      advanced:
+        type: boolean
+      autocomplete_provider:
+        description: '"backends", "models:chat", etc.'
+        type: string
+      component:
+        description: '"input", "number", "toggle", "select", "slider", etc.'
+        type: string
+      default: {}
+      description:
+        description: help text
+        type: string
+      go_type:
+        description: '"*int", "string", "[]string"'
+        type: string
+      label:
+        description: human-readable label
+        type: string
+      max:
+        type: number
+      min:
+        type: number
+      options:
+        items:
+          $ref: '#/definitions/meta.FieldOption'
+        type: array
+      order:
+        type: integer
+      path:
+        description: 'dot-path: "context_size", "function.grammar.parallel_calls"'
+        type: string
+      placeholder:
+        type: string
+      pointer:
+        description: true = nil means "not set"
+        type: boolean
+      section:
+        description: '"general", "llm", "templates", etc.'
+        type: string
+      step:
+        type: number
+      ui_type:
+        description: '"string", "int", "float", "bool", "[]string", "map", "object"'
+        type: string
+      vram_impact:
+        type: boolean
+      yaml_key:
+        description: leaf yaml key
+        type: string
+    type: object
+  meta.FieldOption:
+    properties:
+      label:
+        type: string
+      value:
+        type: string
+    type: object
   proto.MemoryUsageData:
     properties:
       breakdown:
@@ -420,6 +505,7 @@ definitions:
   schema.DetectionRequest:
     properties:
       image:
+        description: URL or base64-encoded image to analyze
         type: string
       model:
         type: string
@@ -557,6 +643,152 @@ definitions:
       total_tokens:
         type: integer
     type: object
+  schema.Job:
+    properties:
+      audios:
+        description: List of audio URLs or base64 strings
+        items:
+          type: string
+        type: array
+      completed_at:
+        type: string
+      created_at:
+        type: string
+      error:
+        description: Error message if failed
+        type: string
+      files:
+        description: List of file URLs or base64 strings
+        items:
+          type: string
+        type: array
+      id:
+        description: UUID
+        type: string
+      images:
+        description: |-
+          Multimedia content (for manual execution)
+          Can contain URLs or base64-encoded data URIs
+        items:
+          type: string
+        type: array
+      parameters:
+        additionalProperties:
+          type: string
+        description: Template parameters
+        type: object
+      result:
+        description: Agent response
+        type: string
+      started_at:
+        type: string
+      status:
+        allOf:
+        - $ref: '#/definitions/schema.JobStatus'
+        description: pending, running, completed, failed, cancelled
+      task_id:
+        description: Reference to Task
+        type: string
+      traces:
+        description: Execution traces (reasoning, tool calls, tool results)
+        items:
+          $ref: '#/definitions/schema.JobTrace'
+        type: array
+      triggered_by:
+        description: '"manual", "cron", "api"'
+        type: string
+      videos:
+        description: List of video URLs or base64 strings
+        items:
+          type: string
+        type: array
+      webhook_error:
+        description: Error if webhook failed
+        type: string
+      webhook_sent:
+        description: Webhook delivery tracking
+        type: boolean
+      webhook_sent_at:
+        type: string
+    type: object
+  schema.JobExecutionRequest:
+    properties:
+      audios:
+        description: List of audio URLs or base64 strings
+        items:
+          type: string
+        type: array
+      files:
+        description: List of file URLs or base64 strings
+        items:
+          type: string
+        type: array
+      images:
+        description: |-
+          Multimedia content (optional, for manual execution)
+          Can contain URLs or base64-encoded data URIs
+        items:
+          type: string
+        type: array
+      parameters:
+        additionalProperties:
+          type: string
+        description: Optional, for templating
+        type: object
+      task_id:
+        description: Required
+        type: string
+      videos:
+        description: List of video URLs or base64 strings
+        items:
+          type: string
+        type: array
+    type: object
+  schema.JobExecutionResponse:
+    properties:
+      job_id:
+        description: unique job identifier
+        type: string
+      status:
+        description: initial status (pending)
+        type: string
+      url:
+        description: URL to poll for job status
+        type: string
+    type: object
+  schema.JobStatus:
+    enum:
+    - pending
+    - running
+    - completed
+    - failed
+    - cancelled
+    type: string
+    x-enum-varnames:
+    - JobStatusPending
+    - JobStatusRunning
+    - JobStatusCompleted
+    - JobStatusFailed
+    - JobStatusCancelled
+  schema.JobTrace:
+    properties:
+      arguments:
+        additionalProperties: true
+        description: Tool arguments or result data
+        type: object
+      content:
+        description: The actual trace content
+        type: string
+      timestamp:
+        description: When this trace occurred
+        type: string
+      tool_name:
+        description: Tool name (for tool_call/tool_result)
+        type: string
+      type:
+        description: '"reasoning", "tool_call", "tool_result", "status"'
+        type: string
+    type: object
   schema.LogprobContent:
     properties:
       bytes:
@@ -632,6 +864,20 @@ definitions:
       object:
         type: string
     type: object
+  schema.MultimediaSourceConfig:
+    properties:
+      headers:
+        additionalProperties:
+          type: string
+        description: Custom headers for HTTP request (e.g., Authorization)
+        type: object
+      type:
+        description: '"image", "video", "audio", "file"'
+        type: string
+      url:
+        description: URL to fetch from
+        type: string
+    type: object
   schema.NodeData:
     properties:
       id:
@@ -1226,10 +1472,12 @@ definitions:
   schema.SystemInformationResponse:
     properties:
       backends:
+        description: available backend engines
         items:
           type: string
         type: array
       loaded_models:
+        description: currently loaded models
         items:
           $ref: '#/definitions/schema.SysInfoModel'
         type: array
@@ -1238,6 +1486,7 @@ definitions:
     description: TTS request body
     properties:
       backend:
+        description: backend engine override
         type: string
       input:
         description: text input
@@ -1260,9 +1509,64 @@ definitions:
         description: voice audio file or speaker id
         type: string
     type: object
+  schema.Task:
+    properties:
+      created_at:
+        type: string
+      cron:
+        description: Optional cron expression
+        type: string
+      cron_parameters:
+        additionalProperties:
+          type: string
+        description: Parameters to use when executing cron jobs
+        type: object
+      description:
+        description: Optional description
+        type: string
+      enabled:
+        description: Can be disabled without deletion
+        type: boolean
+      id:
+        description: UUID
+        type: string
+      model:
+        description: Model name (must have MCP config)
+        type: string
+      multimedia_sources:
+        description: |-
+          Multimedia sources (for cron jobs)
+          URLs to fetch multimedia content from when cron job executes
+          Each source can have custom headers for authentication/authorization
+        items:
+          $ref: '#/definitions/schema.MultimediaSourceConfig'
+        type: array
+      name:
+        description: User-friendly name
+        type: string
+      prompt:
+        description: Template prompt (supports {{.param}} syntax)
+        type: string
+      updated_at:
+        type: string
+      webhooks:
+        description: |-
+          Webhook configuration (for notifications)
+          Support multiple webhook endpoints
+          Webhooks can handle both success and failure cases using template variables:
+          - {{.Job}} - Job object with all fields
+          - {{.Task}} - Task object
+          - {{.Result}} - Job result (if successful)
+          - {{.Error}} - Error message (if failed, empty string if successful)
+          - {{.Status}} - Job status string
+        items:
+          $ref: '#/definitions/schema.WebhookConfig'
+        type: array
+    type: object
   schema.TokenizeRequest:
     properties:
       content:
+        description: text to tokenize
         type: string
       model:
         type: string
@@ -1270,6 +1574,7 @@ definitions:
   schema.TokenizeResponse:
     properties:
       tokens:
+        description: token IDs
         items:
           type: integer
         type: array
@@ -1289,7 +1594,7 @@ definitions:
     description: VAD request body
     properties:
       audio:
-        description: model name or full path
+        description: raw audio samples as float32 PCM
         items:
           type: number
         type: array
@@ -1299,38 +1604,70 @@ definitions:
   schema.VideoRequest:
     properties:
       cfg_scale:
+        description: classifier-free guidance scale
         type: number
       end_image:
+        description: URL or base64 of the last frame
         type: string
       fps:
+        description: frames per second
         type: integer
       height:
+        description: output height in pixels
         type: integer
       input_reference:
+        description: reference image or video URL
         type: string
       model:
         type: string
       negative_prompt:
+        description: things to avoid in the output
         type: string
       num_frames:
+        description: total number of frames to generate
         type: integer
       prompt:
+        description: text description of the video to generate
         type: string
       response_format:
+        description: output format (url or b64_json)
         type: string
       seconds:
+        description: duration in seconds (alternative to num_frames)
         type: string
       seed:
+        description: random seed for reproducibility
         type: integer
       size:
+        description: WxH shorthand (e.g. "512x512")
         type: string
       start_image:
+        description: URL or base64 of the first frame
         type: string
       step:
+        description: number of diffusion steps
         type: integer
       width:
+        description: output width in pixels
         type: integer
     type: object
+  schema.WebhookConfig:
+    properties:
+      headers:
+        additionalProperties:
+          type: string
+        description: Custom headers (e.g., Authorization)
+        type: object
+      method:
+        description: 'HTTP method (POST, PUT, PATCH) - default: POST'
+        type: string
+      payload_template:
+        description: Optional template for payload
+        type: string
+      url:
+        description: Webhook endpoint URL
+        type: string
+    type: object
   services.GalleryOpStatus:
     properties:
       cancellable:
@@ -1369,6 +1706,400 @@ info:
   title: LocalAI API
   version: 2.0.0
 paths:
+  /api/agent/jobs:
+    get:
+      parameters:
+      - description: Filter by task ID
+        in: query
+        name: task_id
+        type: string
+      - description: Filter by status (pending, running, completed, failed, cancelled)
+        in: query
+        name: status
+        type: string
+      - description: Max number of jobs to return
+        in: query
+        name: limit
+        type: integer
+      - description: Set to 'true' for admin cross-user listing
+        in: query
+        name: all_users
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: jobs
+          schema:
+            items:
+              $ref: '#/definitions/schema.Job'
+            type: array
+      summary: List agent jobs
+      tags:
+      - agent-jobs
+  /api/agent/jobs/{id}:
+    delete:
+      parameters:
+      - description: Job ID
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: message
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "404":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Delete an agent job
+      tags:
+      - agent-jobs
+    get:
+      parameters:
+      - description: Job ID
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: job
+          schema:
+            $ref: '#/definitions/schema.Job'
+        "404":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Get an agent job
+      tags:
+      - agent-jobs
+  /api/agent/jobs/{id}/cancel:
+    post:
+      parameters:
+      - description: Job ID
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: message
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "400":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "404":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Cancel an agent job
+      tags:
+      - agent-jobs
+  /api/agent/jobs/execute:
+    post:
+      consumes:
+      - application/json
+      parameters:
+      - description: Job execution request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.JobExecutionRequest'
+      produces:
+      - application/json
+      responses:
+        "201":
+          description: job created
+          schema:
+            $ref: '#/definitions/schema.JobExecutionResponse'
+        "400":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Execute an agent job
+      tags:
+      - agent-jobs
+  /api/agent/tasks:
+    get:
+      parameters:
+      - description: Set to 'true' for admin cross-user listing
+        in: query
+        name: all_users
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: tasks
+          schema:
+            items:
+              $ref: '#/definitions/schema.Task'
+            type: array
+      summary: List agent tasks
+      tags:
+      - agent-jobs
+    post:
+      consumes:
+      - application/json
+      parameters:
+      - description: Task definition
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.Task'
+      produces:
+      - application/json
+      responses:
+        "201":
+          description: id
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "400":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Create a new agent task
+      tags:
+      - agent-jobs
+  /api/agent/tasks/{id}:
+    delete:
+      parameters:
+      - description: Task ID
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: message
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "404":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Delete an agent task
+      tags:
+      - agent-jobs
+    get:
+      parameters:
+      - description: Task ID
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: task
+          schema:
+            $ref: '#/definitions/schema.Task'
+        "404":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Get an agent task
+      tags:
+      - agent-jobs
+    put:
+      consumes:
+      - application/json
+      parameters:
+      - description: Task ID
+        in: path
+        name: id
+        required: true
+        type: string
+      - description: Updated task definition
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.Task'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: message
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "400":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "404":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Update an agent task
+      tags:
+      - agent-jobs
+  /api/agent/tasks/{name}/execute:
+    post:
+      consumes:
+      - application/json
+      parameters:
+      - description: Task name
+        in: path
+        name: name
+        required: true
+        type: string
+      - description: Optional template parameters
+        in: body
+        name: parameters
+        schema:
+          type: object
+      produces:
+      - application/json
+      responses:
+        "201":
+          description: job created
+          schema:
+            $ref: '#/definitions/schema.JobExecutionResponse'
+        "400":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "404":
+          description: error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Execute an agent task by name
+      tags:
+      - agent-jobs
+  /api/models/config-json/{name}:
+    patch:
+      consumes:
+      - application/json
+      description: Deep-merges the JSON patch body into the existing model config
+      parameters:
+      - description: Model name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: success message
+          schema:
+            additionalProperties: true
+            type: object
+      summary: Partially update a model configuration
+      tags:
+      - config
+  /api/models/config-metadata:
+    get:
+      description: Returns ~170 config fields with types, UI hints, sections, and
+        options
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: List of field metadata
+          schema:
+            items:
+              $ref: '#/definitions/meta.FieldMeta'
+            type: array
+      summary: List all model configuration field metadata
+      tags:
+      - config
+  /api/models/config-metadata/autocomplete/{provider}:
+    get:
+      description: Returns runtime-resolved values for dynamic providers (backends,
+        models)
+      parameters:
+      - description: Provider name (backends, models, models:chat, models:tts, models:transcript,
+          models:vad)
+        in: path
+        name: provider
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: values array
+          schema:
+            additionalProperties: true
+            type: object
+      summary: Get dynamic autocomplete values for a config field
+      tags:
+      - config
+  /api/models/vram-estimate:
+    post:
+      consumes:
+      - application/json
+      description: Estimates VRAM based on model weight files, context size, and GPU
+        layers
+      parameters:
+      - description: VRAM estimation parameters
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/localai.vramEstimateRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: VRAM estimate
+          schema:
+            $ref: '#/definitions/localai.vramEstimateResponse'
+      summary: Estimate VRAM usage for a model
+      tags:
+      - config
   /api/p2p:
     get:
       responses:
@@ -1379,6 +2110,8 @@ paths:
               $ref: '#/definitions/schema.P2PNodesResponse'
             type: array
       summary: Returns available P2P nodes
+      tags:
+      - p2p
   /api/p2p/token:
     get:
       responses:
@@ -1387,6 +2120,8 @@ paths:
           schema:
             type: string
       summary: Show the P2P token
+      tags:
+      - p2p
   /backend/monitor:
     get:
       parameters:
@@ -1402,6 +2137,8 @@ paths:
           schema:
             $ref: '#/definitions/proto.StatusResponse'
       summary: Backend monitor endpoint
+      tags:
+      - monitoring
   /backend/shutdown:
     post:
       parameters:
@@ -1412,7 +2149,9 @@ paths:
         schema:
           $ref: '#/definitions/schema.BackendMonitorRequest'
       responses: {}
-      summary: Backend monitor endpoint
+      summary: Backend shutdown endpoint
+      tags:
+      - monitoring
   /backends:
     get:
       responses:
@@ -1423,6 +2162,8 @@ paths:
               $ref: '#/definitions/gallery.GalleryBackend'
             type: array
       summary: List all Backends
+      tags:
+      - backends
   /backends/apply:
     post:
       parameters:
@@ -1438,6 +2179,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.BackendResponse'
       summary: Install backends to LocalAI.
+      tags:
+      - backends
   /backends/available:
     get:
       responses:
@@ -1448,6 +2191,8 @@ paths:
               $ref: '#/definitions/gallery.GalleryBackend'
             type: array
       summary: List all available Backends
+      tags:
+      - backends
   /backends/delete/{name}:
     post:
       parameters:
@@ -1462,6 +2207,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.BackendResponse'
       summary: delete backends from LocalAI.
+      tags:
+      - backends
   /backends/galleries:
     get:
       responses:
@@ -1472,6 +2219,8 @@ paths:
               $ref: '#/definitions/config.Gallery'
             type: array
       summary: List all Galleries
+      tags:
+      - backends
   /backends/jobs:
     get:
       responses:
@@ -1482,6 +2231,8 @@ paths:
               $ref: '#/definitions/services.GalleryOpStatus'
             type: object
       summary: Returns all the jobs status progress
+      tags:
+      - backends
   /backends/jobs/{uuid}:
     get:
       responses:
@@ -1490,17 +2241,20 @@ paths:
           schema:
             $ref: '#/definitions/services.GalleryOpStatus'
       summary: Returns the job status
+      tags:
+      - backends
   /metrics:
     get:
-      parameters:
-      - description: Gallery details
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/config.Gallery'
-      responses: {}
+      produces:
+      - text/plain
+      responses:
+        "200":
+          description: Prometheus metrics
+          schema:
+            type: string
       summary: Prometheus metrics endpoint
+      tags:
+      - monitoring
   /models/apply:
     post:
       parameters:
@@ -1516,6 +2270,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.GalleryResponse'
       summary: Install models to LocalAI.
+      tags:
+      - models
   /models/available:
     get:
       responses:
@@ -1523,9 +2279,11 @@ paths:
           description: Response
           schema:
             items:
-              $ref: '#/definitions/gallery.GalleryModel'
+              $ref: '#/definitions/gallery.Metadata'
             type: array
       summary: List installable models.
+      tags:
+      - models
   /models/delete/{name}:
     post:
       parameters:
@@ -1540,6 +2298,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.GalleryResponse'
       summary: delete models to LocalAI.
+      tags:
+      - models
   /models/galleries:
     get:
       responses:
@@ -1550,6 +2310,8 @@ paths:
               $ref: '#/definitions/config.Gallery'
             type: array
       summary: List all Galleries
+      tags:
+      - models
   /models/jobs:
     get:
       responses:
@@ -1560,6 +2322,8 @@ paths:
               $ref: '#/definitions/services.GalleryOpStatus'
             type: object
       summary: Returns all the jobs status progress
+      tags:
+      - models
   /models/jobs/{uuid}:
     get:
       responses:
@@ -1568,6 +2332,8 @@ paths:
           schema:
             $ref: '#/definitions/services.GalleryOpStatus'
       summary: Returns the job status
+      tags:
+      - models
   /system:
     get:
       responses:
@@ -1576,6 +2342,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.SystemInformationResponse'
       summary: Show the LocalAI instance information
+      tags:
+      - monitoring
   /tokenMetrics:
     get:
       consumes:
@@ -1588,6 +2356,8 @@ paths:
           schema:
             type: string
       summary: Get TokenMetrics for Active Slot.
+      tags:
+      - tokenize
   /tts:
     post:
       consumes:
@@ -1607,6 +2377,8 @@ paths:
           schema:
             type: string
       summary: Generates audio from the input text.
+      tags:
+      - audio
   /v1/audio/speech:
     post:
       consumes:
@@ -1626,6 +2398,8 @@ paths:
           schema:
             type: string
       summary: Generates audio from the input text.
+      tags:
+      - audio
   /v1/audio/transcriptions:
     post:
       consumes:
@@ -1649,6 +2423,8 @@ paths:
               type: string
             type: object
       summary: Transcribes audio into the input language.
+      tags:
+      - audio
   /v1/chat/completions:
     post:
       parameters:
@@ -1664,6 +2440,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.OpenAIResponse'
       summary: Generate a chat completions for a given prompt and model.
+      tags:
+      - inference
   /v1/completions:
     post:
       parameters:
@@ -1679,6 +2457,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.OpenAIResponse'
       summary: Generate completions for a given prompt and model.
+      tags:
+      - inference
   /v1/detection:
     post:
       parameters:
@@ -1694,6 +2474,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.DetectionResponse'
       summary: Detects objects in the input image.
+      tags:
+      - detection
   /v1/edits:
     post:
       parameters:
@@ -1709,6 +2491,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.OpenAIResponse'
       summary: OpenAI edit endpoint
+      tags:
+      - inference
   /v1/embeddings:
     post:
       parameters:
@@ -1725,6 +2509,8 @@ paths:
             $ref: '#/definitions/schema.OpenAIResponse'
       summary: Get a vector representation of a given input that can be easily consumed
         by machine learning models and algorithms.
+      tags:
+      - embeddings
   /v1/images/generations:
     post:
       parameters:
@@ -1740,6 +2526,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.OpenAIResponse'
       summary: Creates an image given a prompt.
+      tags:
+      - images
   /v1/images/inpainting:
     post:
       consumes:
@@ -1808,6 +2596,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.OpenAIResponse'
       summary: MCP chat completions with automatic tool execution
+      tags:
+      - mcp
   /v1/messages:
     post:
       parameters:
@@ -1823,6 +2613,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.AnthropicResponse'
       summary: Generate a message response for the given messages and model.
+      tags:
+      - inference
   /v1/models:
     get:
       responses:
@@ -1831,6 +2623,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.ModelsDataResponse'
       summary: List and describe the various models available in the API.
+      tags:
+      - models
   /v1/rerank:
     post:
       parameters:
@@ -1846,6 +2640,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.JINARerankResponse'
       summary: Reranks a list of phrases by relevance to a given text query.
+      tags:
+      - rerank
   /v1/responses:
     post:
       parameters:
@@ -1861,6 +2657,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.ORResponseResource'
       summary: Create a response using the Open Responses API
+      tags:
+      - inference
   /v1/responses/{id}:
     get:
       description: Retrieve a response by ID. Can be used for polling background responses
@@ -1895,6 +2693,8 @@ paths:
             additionalProperties: true
             type: object
       summary: Get a response by ID
+      tags:
+      - inference
   /v1/responses/{id}/cancel:
     post:
       description: Cancel a background response if it's still in progress
@@ -1920,6 +2720,8 @@ paths:
             additionalProperties: true
             type: object
       summary: Cancel a response
+      tags:
+      - inference
   /v1/sound-generation:
     post:
       parameters:
@@ -1935,6 +2737,8 @@ paths:
           schema:
             type: string
       summary: Generates audio from the input text.
+      tags:
+      - audio
   /v1/text-to-speech/{voice-id}:
     post:
       parameters:
@@ -1955,6 +2759,8 @@ paths:
           schema:
             type: string
       summary: Generates audio from the input text.
+      tags:
+      - audio
   /v1/tokenMetrics:
     get:
       consumes:
@@ -1967,6 +2773,8 @@ paths:
           schema:
             type: string
       summary: Get TokenMetrics for Active Slot.
+      tags:
+      - tokenize
   /v1/tokenize:
     post:
       parameters:
@@ -1982,6 +2790,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.TokenizeResponse'
       summary: Tokenize the input.
+      tags:
+      - tokenize
   /vad:
     post:
       consumes:
@@ -1999,6 +2809,8 @@ paths:
           schema:
             $ref: '#/definitions/proto.VADResponse'
       summary: Detect voice fragments in an audio stream
+      tags:
+      - audio
   /video:
     post:
       parameters:
@@ -2014,6 +2826,8 @@ paths:
           schema:
             $ref: '#/definitions/schema.OpenAIResponse'
       summary: Creates a video given a prompt.
+      tags:
+      - video
 securityDefinitions:
   BearerAuth:
     in: header