chore: update model benchmark scores and metadata

ericyangpan · claude · ericyangpan · commit 9bc91b0d2415 · 2025-12-13T10:45:48.000+08:00
- Add benchmark scores to 33 model manifests - SWE-bench, TerminalBench, SciCode, LiveCodeBench scores - MMMU and WebDevArena scores (where available) - Update GitHub stars data - Regenerate metadata files from updated manifests Models updated with benchmarks: - Anthropic Claude (4, 4.5, Haiku, Opus, Sonnet) - OpenAI GPT (4o, 4.1, 5, 5.1, Codex variants) - Google Gemini (2.5 Flash, 2.5 Pro, 3 Pro) - DeepSeek (R1, V3 Terminus) - Alibaba Qwen3 Coder (30B, 480B, Plus) - Z.ai GLM-4.6 - Meta Llama 4 Maverick - Moonshot Kimi K2 - xAI Grok Code Fast 1 - MiniMax M2 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
diff --git a/data/github-stars.json b/data/github-stars.json
@@ -2,36 +2,37 @@
   "extensions": {
     "amp": null,
     "augment-code": null,
-    "claude-code": 44,
-    "cline": 53.8,
-    "codex": 51.5,
-    "continue": 30.1,
+    "claude-code": 45,
+    "cline": 55.7,
+    "codex": 52.1,
+    "continue": 30.2,
     "droid": null,
+    "gemini-code-assist": null,
     "github-copilot": null,
     "jetbrains-junie": null,
-    "kilo-code": 12.7,
+    "kilo-code": 13,
     "qoder": null,
-    "roo-code": 20.9,
+    "roo-code": 21.1,
     "tabnine": 10.8
   },
   "clis": {
     "amazon-q-developer-cli": 1.8,
     "amp-cli": null,
     "augment-code-cli": 0.1,
-    "claude-code-cli": 44,
+    "claude-code-cli": 45,
     "cline-cli": null,
     "codebuddy-cli": null,
-    "codex-cli": 51.5,
-    "continue-cli": 30.1,
+    "codex-cli": 52.1,
+    "continue-cli": 30.2,
     "droid-cli": null,
-    "gemini-cli": 84.6,
-    "github-copilot-cli": 5.5,
-    "kilo-code-cli": 12.7,
-    "kimi-cli": 3.3,
-    "kiro-cli": null,
-    "kode": 3.6,
-    "neovate-code": 1.1,
-    "opencode": 34.5,
+    "gemini-cli": 86.3,
+    "github-copilot-cli": 5.8,
+    "kilo-code-cli": 13,
+    "kimi-cli": 3.5,
+    "kiro-cli": 2.4,
+    "kode": 3.7,
+    "neovate-code": 1.2,
+    "opencode": 37.1,
     "qoder-cli": null
   },
   "ides": {
@@ -40,11 +41,15 @@
     "codeflicker": null,
     "cursor": 31.8,
     "intellij-idea": 19.2,
-    "kiro": 2.3,
+    "kiro": 2.4,
     "qoder": null,
     "trae": null,
-    "vscode": 179.1,
+    "vscode": 179.4,
     "windsurf": null,
-    "zed": 70.6
+    "zed": 71.1
+  },
+  "models": {
+    "composer": null,
+    "glm-4-6v": null
   }
 }
diff --git a/manifests/models/claude-haiku-4-5.json b/manifests/models/claude-haiku-4-5.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/claude-4-5-haiku",
     "openrouter": "https://openrouter.ai/anthropic/claude-haiku-4.5"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.298,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/claude-opus-4-1.json b/manifests/models/claude-opus-4-1.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/claude-4-1-opus",
     "openrouter": "https://openrouter.ai/anthropic/claude-opus-4.1"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.38,
+    "sciCode": null,
+    "liveCodeBench": 46.9
   }
 }
diff --git a/manifests/models/claude-opus-4.json b/manifests/models/claude-opus-4.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/claude-4-opus",
     "openrouter": "https://openrouter.ai/anthropic/claude-opus-4"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.578,
+    "sciCode": null,
+    "liveCodeBench": 56.6
   }
 }
diff --git a/manifests/models/claude-sonnet-4-5.json b/manifests/models/claude-sonnet-4-5.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/claude-4-5-sonnet",
     "openrouter": "https://openrouter.ai/anthropic/claude-sonnet-4.5"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.428,
+    "sciCode": null,
+    "liveCodeBench": 47.1
   }
 }
diff --git a/manifests/models/claude-sonnet-4.json b/manifests/models/claude-sonnet-4.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/claude-4-sonnet",
     "openrouter": "https://openrouter.ai/anthropic/claude-sonnet-4"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.428,
+    "sciCode": null,
+    "liveCodeBench": 55.9
   }
 }
diff --git a/manifests/models/deepseek-r1.json b/manifests/models/deepseek-r1.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
     "artificialAnalysis": "https://artificialanalysis.ai/models/deepseek-r1",
     "openrouter": "https://openrouter.ai/deepseek/deepseek-r1"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": null,
+    "sciCode": 4.6,
+    "liveCodeBench": 73.1
   }
 }
diff --git a/manifests/models/deepseek-v3-terminus.json b/manifests/models/deepseek-v3-terminus.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/deepseek-ai/DeepSeek-V3.1-Terminus",
     "artificialAnalysis": "https://artificialanalysis.ai/models/deepseek-v3-1-terminus",
     "openrouter": "https://openrouter.ai/deepseek/deepseek-v3.1-terminus"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": null,
+    "sciCode": 3.1,
+    "liveCodeBench": 27.2
   }
 }
diff --git a/manifests/models/gemini-2-5-flash.json b/manifests/models/gemini-2-5-flash.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gemini-2-5-flash",
     "openrouter": "https://openrouter.ai/google/gemini-2.5-flash"
+  },
+  "benchmarks": {
+    "sweBench": 28.73,
+    "terminalBench": 0.171,
+    "sciCode": null,
+    "liveCodeBench": 61.9
   }
 }
diff --git a/manifests/models/gemini-2-5-pro.json b/manifests/models/gemini-2-5-pro.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gemini-2-5-pro",
     "openrouter": "https://openrouter.ai/google/gemini-2.5-pro"
+  },
+  "benchmarks": {
+    "sweBench": 53.6,
+    "terminalBench": 0.326,
+    "sciCode": null,
+    "liveCodeBench": 73.6
   }
 }
diff --git a/manifests/models/gemini-3-pro.json b/manifests/models/gemini-3-pro.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gemini-3-pro",
     "openrouter": "https://openrouter.ai/google/gemini-3-pro"
+  },
+  "benchmarks": {
+    "sweBench": 74.2,
+    "terminalBench": 0.589,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/glm-4-6.json b/manifests/models/glm-4-6.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/zai-org/GLM-4.6",
     "artificialAnalysis": "https://artificialanalysis.ai/models/glm-4-6",
     "openrouter": "https://openrouter.ai/z-ai/glm-4.6"
+  },
+  "benchmarks": {
+    "sweBench": 55.4,
+    "terminalBench": 0.245,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/gpt-4-1.json b/manifests/models/gpt-4-1.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gpt-4-1",
     "openrouter": "https://openrouter.ai/openai/gpt-4.1"
+  },
+  "benchmarks": {
+    "sweBench": 39.58,
+    "terminalBench": null,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/gpt-4o.json b/manifests/models/gpt-4o.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gpt-4o",
     "openrouter": "https://openrouter.ai/openai/gpt-4o"
+  },
+  "benchmarks": {
+    "sweBench": 21.62,
+    "terminalBench": null,
+    "sciCode": 1.5,
+    "liveCodeBench": 29.5
   }
 }
diff --git a/manifests/models/gpt-5-1-codex.json b/manifests/models/gpt-5-1-codex.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": null,
     "openrouter": "https://openrouter.ai/openai/gpt-5.1-codex"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.604,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/gpt-5-1.json b/manifests/models/gpt-5-1.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gpt-5-1",
     "openrouter": "https://openrouter.ai/openai/gpt-5.1"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.47600000000000003,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/gpt-5-codex.json b/manifests/models/gpt-5-codex.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gpt-5-codex",
     "openrouter": "https://openrouter.ai/openai/gpt-5-codex"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.496,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/gpt-5.json b/manifests/models/gpt-5.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/gpt-5",
     "openrouter": "https://openrouter.ai/openai/gpt-5"
+  },
+  "benchmarks": {
+    "sweBench": 65,
+    "terminalBench": 0.496,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/grok-code-fast-1.json b/manifests/models/grok-code-fast-1.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": "https://artificialanalysis.ai/models/grok-code-fast-1",
     "openrouter": "https://openrouter.ai/x-ai/grok-code-fast-1"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.258,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/kimi-k2-0905.json b/manifests/models/kimi-k2-0905.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
     "artificialAnalysis": "https://artificialanalysis.ai/models/kimi-k2-0905",
     "openrouter": "https://openrouter.ai/moonshotai/kimi-k2-0905"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": null,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/llama-4-maverick.json b/manifests/models/llama-4-maverick.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/meta-llama/Llama-4-Maverick",
     "artificialAnalysis": "https://artificialanalysis.ai/models/llama-4-maverick",
     "openrouter": "https://openrouter.ai/meta/llama-4-maverick"
+  },
+  "benchmarks": {
+    "sweBench": 21.04,
+    "terminalBench": null,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/minimax-m2.json b/manifests/models/minimax-m2.json
@@ -24,5 +24,11 @@
     "huggingface": null,
     "artificialAnalysis": null,
     "openrouter": null
+  },
+  "benchmarks": {
+    "sweBench": 61,
+    "terminalBench": 0.3,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/qwen3-coder-30b-a3b.json b/manifests/models/qwen3-coder-30b-a3b.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct",
     "artificialAnalysis": "https://artificialanalysis.ai/models/qwen3-coder-30b-a3b-instruct",
     "openrouter": "https://openrouter.ai/qwen/qwen3-coder-30b-a3b-instruct"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": null,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/qwen3-coder-480b-a35b.json b/manifests/models/qwen3-coder-480b-a35b.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
     "artificialAnalysis": "https://artificialanalysis.ai/models/qwen3-coder-480b-a35b-instruct",
     "openrouter": "https://openrouter.ai/qwen/qwen3-coder"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": 0.254,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/manifests/models/qwen3-coder-plus.json b/manifests/models/qwen3-coder-plus.json
@@ -24,5 +24,11 @@
     "huggingface": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B",
     "artificialAnalysis": null,
     "openrouter": "https://openrouter.ai/qwen/qwen3-coder-plus"
+  },
+  "benchmarks": {
+    "sweBench": null,
+    "terminalBench": null,
+    "sciCode": null,
+    "liveCodeBench": null
   }
 }
diff --git a/src/lib/generated/extensions.ts b/src/lib/generated/extensions.ts
@@ -11,6 +11,7 @@ import Cline from '../../../manifests/extensions/cline.json'
 import Codex from '../../../manifests/extensions/codex.json'
 import Continue from '../../../manifests/extensions/continue.json'
 import Droid from '../../../manifests/extensions/droid.json'
+import GeminiCodeAssist from '../../../manifests/extensions/gemini-code-assist.json'
 import GithubCopilot from '../../../manifests/extensions/github-copilot.json'
 import JetbrainsJunie from '../../../manifests/extensions/jetbrains-junie.json'
 import KiloCode from '../../../manifests/extensions/kilo-code.json'
@@ -27,6 +28,7 @@ export const extensionsData = [
   Codex,
   Continue,
   Droid,
+  GeminiCodeAssist,
   GithubCopilot,
   JetbrainsJunie,
   KiloCode,
diff --git a/src/lib/generated/metadata.ts b/src/lib/generated/metadata.ts
@@ -601,8 +601,8 @@ export const faqMetadata: Record<string, FaqItem[]> = {
 export const stackCounts: Record<string, number> = {
   ides: 11,
   clis: 18,
-  extensions: 13,
-  models: 24,
+  extensions: 14,
+  models: 26,
   'model-providers': 7,
   vendors: 34,
 }
diff --git a/src/lib/generated/models.ts b/src/lib/generated/models.ts
diff --git a/src/lib/generated/vendors.ts b/src/lib/generated/vendors.ts

Original file line number	Diff line number	Diff line change
`@@ -24,5 +24,11 @@`
`24`	`24`	`"huggingface": null,`
`25`	`25`	`"artificialAnalysis": "https://artificialanalysis.ai/models/claude-4-5-haiku",`
`26`	`26`	`"openrouter": "https://openrouter.ai/anthropic/claude-haiku-4.5"`
	`27`	`+ },`
	`28`	`+ "benchmarks": {`
	`29`	`+ "sweBench": null,`
	`30`	`+ "terminalBench": 0.298,`
	`31`	`+ "sciCode": null,`
	`32`	`+ "liveCodeBench": null`
`27`	`33`	`}`
`28`	`34`	`}`