Commit 5041ad3
refactor: trim to 275 canonical tasks, archive non-canonical to backups/
- Remove 5 protonmail/webclients SWE-bench Pro tasks (can't run on Daytona, score 0.0)
- Move 156 non-canonical tasks from suite dirs to benchmarks/backups/
- Update unified_benchmark_manifest.json: 280 → 275 tasks
- Update README.md and benchmarks/README.md with correct suite counts (131 SDLC + 144 Org)
- Clean up related configs (ground_truth_files, mirror_creation_manifest, etc.)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>1 parent 204b408 commit 5041ad3
File tree
3,021 files changed
+37862
-4107
lines changed- benchmarks
- backups
- csb_org_compliance
- ccx-compliance-051
- environment
- tests
- ccx-compliance-115
- environment
- tests
- ccx-compliance-118
- environment
- tests
- ccx-compliance-185
- environment
- tests
- ccx-compliance-186
- environment
- tests
- ccx-compliance-193
- environment
- tests
- ccx-compliance-194
- environment
- tests
- csb_org_crossorg
- ccx-crossorg-208
- environment
- tests
- ccx-crossorg-209
- environment
- tests
- ccx-crossorg-211
- environment
- tests
- ccx-crossorg-213
- environment
- tests
- ccx-crossorg-214
- environment
- tests
- ccx-crossorg-216
- environment
- tests
- csb_org_crossrepo_tracing
- ccx-config-trace-003
- environment
- tests
- ccx-dep-trace-002
- environment
- tests
- ccx-dep-trace-004
- environment
- tests
- ccx-dep-trace-174
- environment
- tests
- ccx-dep-trace-175
- environment
- tests
- ccx-dep-trace-176
- environment
- tests
- ccx-dep-trace-177
- environment
- tests
- ccx-dep-trace-178
- environment
- tests
- ccx-dep-trace-179
- environment
- tests
- ccx-dep-trace-180
- environment
- tests
- ccx-dep-trace-181
- environment
- tests
- ccx-dep-trace-272
- environment
- tests
- csb_org_crossrepo
- ccx-dep-trace-261
- environment
- tests
- ccx-dep-trace-262
- environment
- tests
- ccx-dep-trace-263
- environment
- tests
- csb_org_domain
- ccx-domain-071
- environment
- tests
- ccx-domain-072
- environment
- tests
- ccx-domain-074
- environment
- tests
- ccx-domain-120
- environment
- tests
- ccx-domain-151
- environment
- tests
- ccx-domain-152
- environment
- tests
- ccx-domain-153
- environment
- tests
- ccx-domain-154
- environment
- tests
- ccx-domain-158
- environment
- tests
- csb_org_incident
- ccx-incident-031
- environment
- tests
- ccx-incident-033
- environment
- tests
- ccx-incident-142
- environment
- tests
- ccx-incident-143
- environment
- tests
- ccx-incident-146
- environment
- tests
- ccx-incident-147
- environment
- tests
- ccx-incident-150
- environment
- tests
- ccx-incident-297
- environment
- tests
- ccx-incident-298
- environment
- tests
- ccx-incident-299
- environment
- tests
- ccx-incident-300
- environment
- tests
- ccx-incident-301
- environment
- tests
- ccx-incident-302
- environment
- tests
- ccx-incident-303
- environment
- tests
- ccx-incident-304
- environment
- tests
- ccx-incident-305
- environment
- tests
- ccx-incident-306
- environment
- tests
- ccx-incident-307
- environment
- tests
- ccx-incident-308
- environment
- tests
- ccx-incident-309
- environment
- tests
- ccx-incident-310
- environment
- tests
- ccx-incident-311
- environment
- tests
- ccx-incident-312
- environment
- tests
- csb_org_migration
- ccx-migration-022
- environment
- tests
- ccx-migration-025
- environment
- tests
- ccx-migration-027
- environment
- tests
- csb_org_onboarding
- ccx-explore-042-ds
- environment
- tests
- ccx-onboard-041
- environment
- tests
- ccx-onboard-042
- environment
- tests
- ccx-onboard-043
- environment
- tests
- ccx-onboard-044
- environment
- tests
- ccx-onboard-050-ds
- environment
- tests
- ccx-onboard-050
- environment
- tests
- ccx-onboard-128
- environment
- tests
- ccx-onboard-136
- environment
- tests
- ccx-onboard-138
- environment
- tests
- ccx-onboard-280
- environment
- tests
- ccx-onboard-search-201
- environment
- tests
- ccx-onboard-search-202
- environment
- tests
- ccx-onboard-search-203
- environment
- tests
- ccx-onboard-search-204
- environment
- tests
- ccx-onboard-search-205
- environment
- tests
- ccx-onboard-search-206
- environment
- tests
- csb_org_org
- ccx-agentic-233
- environment
- tests
- ccx-agentic-234
- environment
- tests
- ccx-agentic-235
- environment
- tests
- ccx-agentic-236
- environment
- tests
- ccx-agentic-237
- environment
- tests
- csb_org_platform
- ccx-platform-119
- environment
- tests
- ccx-platform-244
- environment
- tests
- ccx-platform-245
- environment
- tests
- ccx-platform-246
- environment
- tests
- ccx-platform-250
- environment
- tests
- ccx-platform-251
- environment
- tests
- ccx-platform-285
- environment
- tests
- csb_org_security
- ccx-vuln-remed-011
- environment
- tests
- ccx-vuln-remed-012
- environment
- tests
- ccx-vuln-remed-013
- environment
- tests
- ccx-vuln-remed-163
- environment
- tests
- ccx-vuln-remed-164
- environment
- tests
- ccx-vuln-remed-165
- environment
- tests
- ccx-vuln-remed-166
- environment
- tests
- ccx-vuln-remed-168
- environment
- tests
- ccx-vuln-remed-170
- environment
- tests
- ccx-vuln-remed-283
- environment
- tests
- ccx-vuln-remed-284
- environment
- tests
- ccx-vuln-remed-287
- environment
- tests
- ccx-vuln-remed-296
- environment
- tests
- ccx-vuln-remed-313
- environment
- tests
- ccx-vuln-remed-314
- environment
- tests
- ccx-vuln-remed-315
- environment
- tests
- ccx-vuln-remed-316
- environment
- tests
- ccx-vuln-remed-317
- environment
- tests
- ccx-vuln-remed-318
- environment
- tests
- ccx-vuln-remed-319
- environment
- tests
- ccx-vuln-remed-320
- environment
- tests
- ccx-vuln-remed-321
- environment
- tests
- ccx-vuln-remed-322
- environment
- tests
- ccx-vuln-remed-323
- environment
- tests
- ccx-vuln-remed-324
- environment
- tests
- ccx-vuln-remed-325
- environment
- tests
- ccx-vuln-remed-326
- environment
- tests
- ccx-vuln-remed-327
- environment
- tests
- ccx-vuln-remed-328
- environment
- tests
- csb_sdlc_debug
- envoy-duplicate-headers-debug-001
- environment
- tests
- grafana-table-panel-regression-001
- environment
- tests
- istio-xds-destrul-debug-001
- environment
- tests
- prometheus-queue-reshard-debug-001
- environment
- tests
- terraform-phantom-update-debug-001
- environment
- tests
- tutanota-search-regression-prove-001
- environment
- tests
- csb_sdlc_design
- envoy-routeconfig-dep-chain-001
- environment
- tests
- envoy-stream-aggregated-sym-001
- environment
- tests
- flipt-transitive-deps-001
- environment
- tests
- eval_scripts
- k8s-typemeta-dep-chain-001
- environment
- tests
- csb_sdlc_document
- docgen-changelog-002
- environment
- tests
- docgen-runbook-001
- environment
- tests
- istio-arch-doc-gen-001
- environment
- tests
- terraform-arch-doc-gen-001
- environment
- tests
- csb_sdlc_fix
- ansible-module-respawn-fix-001
- environment
- solution
- tests
- flipt-cockroachdb-backend-fix-001
- environment
- solution
- tests
- flipt-ecr-auth-oci-fix-001
- environment
- solution
- tests
- flipt-eval-latency-fix-001
- environment
- tests
- flipt-otlp-exporter-fix-001
- environment
- solution
- tests
- flipt-trace-sampling-fix-001
- environment
- solution
- tests
- openlibrary-fntocli-adapter-fix-001
- environment
- solution
- tests
- openlibrary-search-query-fix-001
- environment
- solution
- tests
- openlibrary-solr-boolean-fix-001
- environment
- solution
- tests
- csb_sdlc_secure
- curl-vuln-reachability-001
- environment
- tests
- expected_patches
- grpcurl-transitive-vuln-001
- environment
- tests
- expected_patches
- csb_sdlc_test
- cockroach-kv-txn-test-001
- environment
- tests
- curl-security-review-001
- environment
- tests
- expected_patches
- numpy-array-sum-perf-001
- environment
- tests
- openhands-search-file-test-001
- environment
- tests
- pandas-groupby-perf-001
- environment
- tests
- sklearn-kmeans-perf-001
- environment
- tests
- test-integration-001
- environment
- tests
- test-integration-002
- environment
- tests
- csb_sdlc_understand/terraform-plan-pipeline-qa-001
- environment
- tests
- csb_org_compliance
- ccx-compliance-052/tests
- ccx-compliance-053/tests
- ccx-compliance-124/tests
- ccx-compliance-182
- tests
- ccx-compliance-183
- tests
- ccx-compliance-184
- tests
- ccx-compliance-187
- tests
- ccx-compliance-189
- tests
- ccx-compliance-190
- tests
- ccx-compliance-191
- tests
- ccx-compliance-192
- tests
- csb_org_crossorg
- ccx-crossorg-062/tests
- ccx-crossorg-121/tests
- ccx-crossorg-132/tests
- ccx-crossorg-217
- tests
- ccx-crossorg-218/tests
- ccx-crossorg-219/tests
- ccx-crossorg-220
- tests
- ccx-crossorg-221
- tests
- ccx-crossorg-222
- tests
- ccx-crossorg-280
- tests
- csb_org_crossrepo_tracing
- ccx-config-trace-010/tests
- ccx-dep-trace-001/tests
- ccx-dep-trace-102/tests
- ccx-dep-trace-116/tests
- ccx-dep-trace-123/tests
- ccx-dep-trace-133/tests
- ccx-dep-trace-171
- tests
- ccx-dep-trace-172
- tests
- ccx-dep-trace-173
- tests
- ccx-dep-trace-273
- tests
- csb_org_crossrepo
- ccx-dep-trace-106/tests
- ccx-dep-trace-253
- tests
- ccx-dep-trace-254
- tests
- ccx-dep-trace-258
- tests
- ccx-dep-trace-260
- tests
- ccx-dep-trace-264
- tests
- ccx-dep-trace-265
- tests
- ccx-dep-trace-266
- tests
- ccx-dep-trace-267
- tests
- ccx-dep-trace-268
- tests
- ccx-dep-trace-271
- tests
- csb_org_domain
- ccx-domain-073/tests
- ccx-domain-101/tests
- ccx-domain-112/tests
- ccx-domain-129/tests
- ccx-domain-137/tests
- ccx-domain-140/tests
- ccx-domain-155
- tests
- ccx-domain-156
- tests
- ccx-domain-157
- tests
- ccx-domain-159
- tests
- ccx-domain-160
- tests
- csb_org_incident
- ccx-incident-032/tests
- ccx-incident-034/tests
- ccx-incident-037/tests
- ccx-incident-108/tests
- ccx-incident-110/tests
- ccx-incident-113/tests
- ccx-incident-125/tests
- ccx-incident-131/tests
- ccx-incident-139/tests
- ccx-incident-144
- tests
- ccx-incident-145
- tests
- ccx-incident-148
- tests
- ccx-incident-149
- tests
- csb_org_migration
- ccx-migration-026/tests
- ccx-migration-107/tests
- ccx-migration-114/tests
- ccx-migration-117/tests
- ccx-migration-195
- tests
- ccx-migration-196
- tests
- ccx-migration-197
- tests
- ccx-migration-198
- tests
- ccx-migration-199
- tests
- ccx-migration-200
- tests
- ccx-migration-201
- tests
- ccx-migration-202
- tests
- ccx-migration-203
- tests
- ccx-migration-204
- tests
- ccx-migration-205
- tests
- ccx-migration-206
- tests
- ccx-migration-207
- tests
- ccx-migration-274
- tests
- ccx-migration-275
- tests
- ccx-migration-276
- tests
- ccx-migration-277
- tests
- ccx-migration-278
- tests
- ccx-migration-279
- tests
- csb_org_onboarding
- ccx-onboard-103/tests
- ccx-onboard-109/tests
- ccx-onboard-134/tests
- ccx-onboard-search-207
- ccx-onboard-search-208
- ccx-onboard-search-209
- ccx-onboard-search-210
- ccx-onboard-search-211
- ccx-onboard-search-212
- ccx-onboard-search-213
- ccx-onboard-search-214
- csb_org_org
- ccx-agentic-081/tests
- ccx-agentic-082/tests
- ccx-agentic-083/tests
- ccx-agentic-122/tests
- ccx-agentic-127/tests
- ccx-agentic-223
- tests
- ccx-agentic-224
- tests
- ccx-agentic-225
- tests
- ccx-agentic-229
- tests
- ccx-agentic-232
- tests
- csb_org_platform
- ccx-platform-091
- tests
- ccx-platform-094/tests
- ccx-platform-100/tests
- ccx-platform-104/tests
- ccx-platform-238
- tests
- ccx-platform-239
- tests
- ccx-platform-240
- tests
- ccx-platform-241
- tests
- ccx-platform-242
- tests
- ccx-platform-243
- tests
- ccx-platform-248
- tests
- ccx-platform-249
- tests
- csb_org_security
- ccx-vuln-remed-014/tests
- ccx-vuln-remed-105/tests
- ccx-vuln-remed-111/tests
- ccx-vuln-remed-126/tests
- ccx-vuln-remed-130/tests
- ccx-vuln-remed-135/tests
- ccx-vuln-remed-141/tests
- ccx-vuln-remed-161
- tests
- ccx-vuln-remed-162
- tests
- ccx-vuln-remed-167
- tests
- ccx-vuln-remed-169/tests
- ccx-vuln-remed-281
- tests
- ccx-vuln-remed-282
- tests
- csb_sdlc_debug
- ansible-galaxy-tar-regression-prove-001/tests
- flipt-auth-cookie-regression-prove-001/tests
- linux-acpi-backlight-fault-001/tests
- linux-hda-intel-suspend-fault-001/tests
- linux-iwlwifi-subdevice-fault-001/tests
- linux-nfs-inode-revalidate-fault-001/tests
- qutebrowser-adblock-cache-regression-prove-001/tests
- qutebrowser-darkmode-threshold-regression-prove-001/tests
- qutebrowser-hsv-color-regression-prove-001/tests
- qutebrowser-url-regression-prove-001/tests
- teleport-ssh-regression-prove-001/tests
- tidb-query-plan-regression-debug-001/tests
- vuls-oval-regression-prove-001/tests
- csb_sdlc_design
- camel-routing-arch-001/tests
- django-orm-query-arch-001/tests
- django-pre-validate-signal-design-001/tests
- django-rate-limit-design-001/tests
- elasticsearch-shard-alloc-design-001/tests
- etcd-grpc-api-upgrade-001/tests
- flink-checkpoint-arch-001/tests
- flipt-protobuf-metadata-design-001/tests
- k8s-crd-lifecycle-arch-001/tests
- kafka-flink-streaming-arch-001/tests
- postgres-query-exec-arch-001/tests
- csb_sdlc_document
- docgen-inline-002/tests
- envoy-arch-doc-gen-001/tests
- envoy-migration-doc-gen-001/tests
- godot-gdscript-api-docgen-001/tests
- grpc-channel-api-docgen-001/tests
- k8s-apiserver-doc-gen-001/tests
- k8s-applyconfig-doc-gen-001/tests
- k8s-clientgo-doc-gen-001/tests
- k8s-fairqueuing-doc-gen-001/tests
- k8s-kubelet-cm-doc-gen-001/tests
- kafka-api-doc-gen-001/tests
- csb_sdlc_feature
- bustub-hyperloglog-impl-001/tests
- camel-fix-protocol-feat-001/tests
- cilium-policy-audit-logger-feat-001
- tests
- cilium-policy-quota-feat-001
- tests
- curl-http3-priority-feat-001
- tests
- django-rate-limit-middleware-feat-001
- tests
- envoy-custom-header-filter-feat-001
- tests
- envoy-grpc-server-impl-001/tests
- flink-pricing-window-feat-001/tests
- k8s-noschedule-taint-feat-001/tests
- k8s-runtime-object-impl-001/tests
- numpy-rolling-median-feat-001
- tests
- pandas-merge-asof-indicator-feat-001
- tests
- postgres-copy-csv-header-feat-001
- tests
- prometheus-silence-bulk-api-feat-001
- tests
- pytorch-gradient-noise-feat-001
- tests
- servo-css-container-query-feat-001
- tests
- servo-scrollend-event-feat-001/tests
- strata-cds-tranche-feat-001/tests
- tensorrt-mxfp4-quant-feat-001/tests
- terraform-compact-diff-fmt-feat-001
- tests
- vscode-custom-fold-region-feat-001
- tests
- vscode-stale-diagnostics-feat-001/tests
- csb_sdlc_fix
- ansible-abc-imports-fix-001/tests
- django-modelchoice-fk-fix-001/tests
- django-select-for-update-fix-001/tests
- element-web-roomheaderbuttons-can-crash-fix-001/tests
- element-web-unread-indicators-diverge-fix-001/tests
- envoy-dfp-host-leak-fix-001/tests
- envoy-udp-proxy-cds-fix-001/tests
- flink-window-late-data-fix-001/tests
- k8s-dra-scheduler-event-fix-001/tests
- kafka-producer-bufpool-fix-001/tests
- nodebb-notif-dropdown-fix-001/tests
- nodebb-plugin-validate-fix-001/tests
- pytorch-cudnn-version-fix-001/tests
- pytorch-dynamo-keyerror-fix-001/tests
- pytorch-release-210-fix-001/tests
- pytorch-relu-gelu-fusion-fix-001/tests
- pytorch-tracer-graph-cleanup-fix-001/tests
- teleport-users-can-delete-fix-001/tests
- terraform-plan-null-unknown-fix-001/tests
- webclients-api-error-metrics-fix-001
- environment
- tests
- webclients-contact-import-fails-fix-001
- environment
- tests
- webclients-excessive-repeated-api-fix-001
- environment
- tests
- webclients-implement-proper-punycode-fix-001
- environment
- tests
- webclients-incorrect-rendering-content-fix-001
- environment
- tests
- csb_sdlc_refactor
- beam-pipeline-builder-refac-001/tests
- cilium-endpoint-manager-refac-001
- tests
- django-request-factory-refac-001
- tests
- envoy-listener-manager-refac-001
- tests
- flipt-dep-refactor-001/tests
- flipt-flagexists-refactor-001/tests
- istio-discovery-server-refac-001
- tests
- k8s-score-normalizer-refac-001/tests
- kafka-batch-accumulator-refac-001/tests
- kubernetes-scheduler-profile-refac-001
- tests
- numpy-array-dispatch-refac-001
- tests
- pandas-index-engine-refac-001
- tests
- prometheus-query-engine-refac-001
- tests
- python-http-class-naming-refac-001/tests
- pytorch-optimizer-foreach-refac-001
- tests
- roslyn-symbol-resolver-refac-001/tests
- strata-fx-european-refac-001/tests
- terraform-eval-context-refac-001
- tests
- csb_sdlc_secure
- ceph-rgw-auth-secure-001/tests
- curl-cve-triage-001/tests
- django-audit-trail-implement-001/tests
- django-cross-team-boundary-001/tests
- django-legacy-dep-vuln-001/tests
- django-repo-scoped-access-001/tests
- django-role-based-access-001/tests
- django-sensitive-file-exclusion-001/tests
- flipt-degraded-context-fix-001/tests
- flipt-repo-scoped-access-001/tests
- k8s-rbac-auth-audit-001
- tests
- kafka-sasl-auth-audit-001/tests
- typescript-type-narrowing-secure-001/tests
- csb_sdlc_test
- aspnetcore-code-review-001/tests
- bazel-starlark-eval-test-001/tests
- calcom-code-review-001/tests
- envoy-code-review-001/tests
- ghost-code-review-001/tests
- kafka-security-review-001/tests
- terraform-code-review-001/tests
- test-coverage-gap-001/tests
- test-coverage-gap-002/tests
- test-unitgen-go-001/tests
- test-unitgen-py-001/tests
- vscode-code-review-001/tests
- csb_sdlc_understand
- argocd-arch-orient-001/tests
- cilium-ebpf-fault-qa-001/tests
- cilium-project-orient-001/tests
- clickhouse-mergetree-arch-understand-001/tests
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
3,021 files changed
+37862
-4107
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
75 | 75 | | |
76 | 76 | | |
77 | 77 | | |
78 | | - | |
79 | 78 | | |
80 | | - | |
81 | | - | |
82 | | - | |
83 | | - | |
84 | | - | |
85 | | - | |
86 | | - | |
87 | | - | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
88 | 88 | | |
89 | 89 | | |
90 | 90 | | |
91 | 91 | | |
92 | 92 | | |
93 | 93 | | |
94 | 94 | | |
95 | | - | |
96 | | - | |
97 | | - | |
98 | | - | |
99 | | - | |
100 | | - | |
101 | | - | |
102 | | - | |
103 | | - | |
104 | | - | |
105 | | - | |
106 | | - | |
107 | | - | |
108 | | - | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
| 98 | + | |
| 99 | + | |
| 100 | + | |
| 101 | + | |
| 102 | + | |
| 103 | + | |
| 104 | + | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
| 108 | + | |
109 | 109 | | |
110 | 110 | | |
111 | 111 | | |
| |||
135 | 135 | | |
136 | 136 | | |
137 | 137 | | |
138 | | - | |
139 | 138 | | |
140 | | - | |
141 | | - | |
142 | | - | |
143 | | - | |
144 | | - | |
145 | | - | |
146 | | - | |
147 | | - | |
148 | | - | |
149 | | - | |
150 | | - | |
151 | | - | |
152 | | - | |
153 | | - | |
154 | | - | |
155 | | - | |
156 | | - | |
157 | | - | |
158 | | - | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
159 | 159 | | |
160 | 160 | | |
161 | 161 | | |
| |||
169 | 169 | | |
170 | 170 | | |
171 | 171 | | |
172 | | - | |
| 172 | + | |
173 | 173 | | |
174 | 174 | | |
175 | 175 | | |
| |||
293 | 293 | | |
294 | 294 | | |
295 | 295 | | |
296 | | - | |
| 296 | + | |
297 | 297 | | |
298 | 298 | | |
299 | | - | |
| 299 | + | |
300 | 300 | | |
301 | 301 | | |
302 | 302 | | |
| |||
312 | 312 | | |
313 | 313 | | |
314 | 314 | | |
315 | | - | |
316 | 315 | | |
317 | | - | |
318 | | - | |
319 | | - | |
320 | | - | |
321 | | - | |
322 | | - | |
323 | | - | |
| 316 | + | |
| 317 | + | |
| 318 | + | |
| 319 | + | |
| 320 | + | |
| 321 | + | |
| 322 | + | |
| 323 | + | |
324 | 324 | | |
325 | 325 | | |
326 | 326 | | |
| |||
0 commit comments