CerebrasResearch · michaelw-cerebras · Oct 10, 2025 · Oct 10, 2025 · Oct 16, 2025 · Oct 21, 2025
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -12,8 +12,20 @@
 		"ghcr.io/devcontainers/features/node:1": {},
 	},
 	"postCreateCommand": ".devcontainer/setup.sh",
-	"runArgs": ["--add-host=host.docker.internal:host-gateway"],
+	"runArgs": ["--add-host=host.docker.internal:host-gateway", "--network=host"],
 	"containerEnv": {
 		"DOCKER_HOST_ADDR": "host.docker.internal"
 	},
+	"mounts": [
+		"source=/mlf11-shared/swebench_dockers_for_eval,target=/workspaces/Openhands/swebench_dockers_for_eval,type=bind,consistency=cached",  // the Openhands instead of OpenHands is intentional, due to file mounting issues
+		"source=/mlf14-shared/michaelw/runtime_dockers,target=/workspaces/Openhands/runtime_dockers,type=bind,consistency=cached"  // the Openhands instead of OpenHands is intentional, due to file mounting issues
+	],
+	"customizations": {
+        "vscode": {
+            "extensions": [
+                "ms-python.python@2024.2.1", // e.g., "ms-python.python@2022.8.1"
+                "ms-python.vscode-pylance"
+            ]
+        }
+    }
 }
diff --git a/.openhands/setup.sh b/.openhands/setup.sh
@@ -11,3 +11,4 @@ if [ -d ".git" ]; then
     pre-commit install
     make install-pre-commit-hooks
 fi
+sudo apt-get update && sudo apt-get install -y tmux
diff --git a/clean_docker.sh b/clean_docker.sh
@@ -0,0 +1,11 @@
+docker images 'ghcr.io/all-hands-ai/runtime' -q | sort -u | \
+while read -r id; do
+  if [ -z "$(docker ps -aq --filter ancestor="$id")" ]; then
+    tags=$(docker image inspect "$id" --format '{{join .RepoTags " "}}' 2>/dev/null || true)
+    [ -n "$tags" ] && docker rmi $tags
+    docker rmi "$id" 2>/dev/null || true
+  fi
+done
+docker image prune -f; docker builder prune -f; docker volume prune -f;
+
+docker system prune -a -f --volumes;
diff --git a/config.toml b/config.toml
@@ -0,0 +1,49 @@
+[core]
+workspace_base="/workspaces"
+search_api_key="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"  # this is from Step 8
+save_trajectory_path="/workspaces/OpenHands/trajectories.json"
+run_as_openhands=false
+
+
+[llm.gptoss-120b]
+model="openai/gpt-oss-120b"  # openai/<model_name> . The <model_name> will be from litellm config settings.
+api_key="serving-on-vllm"
+base_url="http://0.0.0.0:14129/"  # Port 14129 is the Litellm port that we got from Step 5
+max_input_tokens = 65384
+max_output_tokens = 16384
+max_message_chars = 50000
+
+[llm.qwen_coder_30b_small]
+model="openai/qwen-coder-30b-small" ##model_name will be from litellm config settings.
+api_key="serving-on-vllm"
+base_url="http://0.0.0.0:4000/"
+max_input_tokens = 65384
+max_output_tokens = 16384
+max_message_chars = 50000
+native_tool_calling = true
+
+[llm.cepov3_optillm_qwen30b]
+model="openai/Qwen/Qwen3-Coder-30B-A3B-Instruct"
+base_url="http://localhost:10010/v1"
+max_input_tokens = 65384
+max_output_tokens = 16384
+max_message_chars = 50000
+native_tool_calling = true
+log_completions = true
+
+
+[agent]
+enable_history_truncation = true
+llm_config = "llm.qwen_coder_30b_small"
+
+[sandbox]
+timeout = 120
+
+
+[condenser.summarizer_for_eval_gptoss-120b]
+type = "llm"
+llm_config = "qwen_coder_30b_small"  # Reference to an LLM config to use for summarization
+keep_first = 2        # Number of initial events to always keep
+max_size = 100        # Maximum size of history before triggering summarization
+
+
diff --git a/config_llm.yaml b/config_llm.yaml
@@ -0,0 +1,11 @@
+model_list:
+  - model_name: qwen-coder-30b-small
+    litellm_params:
+      model: hosted_vllm/Qwen/Qwen3-Coder-30B-A3B-Instruct
+      api_base: http://localhost:10000/v1/
+      api_key: "serving-on-vllm"
+  # - model_name: qwen-coder-30b-small
+  #   litellm_params:
+  #     model: hosted_vllm/Qwen/Qwen3-Coder-30B-A3B-Instruct
+  #     api_base: http://localhost:10000/v1/
+  #     api_key: "serving-on-vllm"
diff --git a/evaluation/benchmarks/swe_bench/config_2.toml b/evaluation/benchmarks/swe_bench/config_2.toml
@@ -0,0 +1,52 @@
+selected_ids = [
+    "pvlib__pvlib-python-1666",
+    "pvlib__pvlib-python-1478",
+    "pylint-dev__astroid-2015",
+    "pyvista__pyvista-432",
+    "pyvista__pyvista-3675",
+    "sqlfluff__sqlfluff-880",
+    "sqlfluff__sqlfluff-891",
+    "marshmallow-code__marshmallow-2123",
+    "pvlib__pvlib-python-1623",
+    "pvlib__pvlib-python-1176",
+    "pvlib__pvlib-python-1469",
+    "pvlib__pvlib-python-1033",
+    "pydicom__pydicom-916",
+    "pydicom__pydicom-1236",
+    "pvlib__pvlib-python-718",
+    "pvlib__pvlib-python-1053",
+    "pvlib__pvlib-python-1518",
+    "pylint-dev__astroid-1741",
+    "pylint-dev__astroid-1030",
+    "pylint-dev__astroid-2219",
+    "pyvista__pyvista-4226",
+    "pyvista__pyvista-3750",
+    "pyvista__pyvista-4225",
+    "pydicom__pydicom-1241",
+    "pydicom__pydicom-1017",
+    "pydicom__pydicom-1048",
+    "pydicom__pydicom-1720",
+    "pydicom__pydicom-1334",
+    "pydicom__pydicom-997",
+    "marshmallow-code__marshmallow-1229",
+    "pvlib__pvlib-python-823",
+    "pvlib__pvlib-python-1186",
+    "pvlib__pvlib-python-1764",
+    "pvlib__pvlib-python-1682",
+    "pvlib__pvlib-python-1832",
+    "pvlib__pvlib-python-1089",
+    "pylint-dev__astroid-2240",
+    "pylint-dev__astroid-1417",
+    "pylint-dev__astroid-984",
+    "pylint-dev__astroid-1092",
+    "pylint-dev__astroid-983",
+    "pydicom__pydicom-995",
+    "pydicom__pydicom-897",
+    "pydicom__pydicom-1674",
+    "pydicom__pydicom-1375",
+    "pvlib__pvlib-python-807",
+    "pvlib__pvlib-python-1048",
+    "pvlib__pvlib-python-1181",
+    "pvlib__pvlib-python-1480",
+    "pvlib__pvlib-python-763",
+]