Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
310 commits
Select commit Hold shift + click to select a range
f5580d7
autoresearch: flatten near-cap carrier into four unique blocks
Mar 22, 2026
02b6b24
autoresearch: record accepted four-block carrier run
Mar 22, 2026
a27de6b
autoresearch: add final-tail smear gate on four-block carrier
Mar 22, 2026
6674adb
Revert "autoresearch: add final-tail smear gate on four-block carrier"
Mar 22, 2026
14be60e
autoresearch: note ar5090-20260322-212617 reverted four-block smear gate
Mar 22, 2026
83273d4
autoresearch: fund five-block carrier with lower-rank q
Mar 22, 2026
c7b9a9a
Revert "autoresearch: fund five-block carrier with lower-rank q"
Mar 22, 2026
06847ec
autoresearch: note ar5090-20260322-213922 reverted five-block low-rank q
Mar 22, 2026
586b8a2
autoresearch: add final-tail neighbor gate on four-block carrier
Mar 22, 2026
1baa824
Revert "autoresearch: add final-tail neighbor gate on four-block carr…
Mar 22, 2026
e591b5a
autoresearch: note ar5090-20260322-215320 reverted four-block neighbo…
Mar 22, 2026
0dde988
autoresearch: lower four-block curriculum floor to 512
Mar 22, 2026
a3144ef
Revert "autoresearch: lower four-block curriculum floor to 512"
Mar 22, 2026
0c1e6ff
autoresearch: note ar5090-20260322-220650 reverted four-block 512-flo…
Mar 22, 2026
dee5558
autoresearch: add small final-tail canon on four-block carrier
Mar 22, 2026
8199d02
Revert "autoresearch: add small final-tail canon on four-block carrier"
Mar 22, 2026
8d70d2f
autoresearch: note ar5090-20260322-222117 reverted four-block small c…
Mar 22, 2026
75f155b
autoresearch: trade one four-block layer for wider three-block carrier
Mar 22, 2026
4470fbe
autoresearch: record accepted three-block wider carrier run
Mar 22, 2026
58c74fe
autoresearch: lift three-block carrier to 960 context
Mar 22, 2026
8651b1b
Revert "autoresearch: lift three-block carrier to 960 context"
Mar 22, 2026
25cf9d6
autoresearch: note ar5090-20260322-225147 reverted 960 context
Mar 22, 2026
84be14c
autoresearch: reallocate tail q rank on three-block carrier
Mar 22, 2026
1bd9c2f
Revert "autoresearch: reallocate tail q rank on three-block carrier"
Mar 22, 2026
0355468
autoresearch: note ar5090-20260322-230738 reverted tail q skew
Mar 22, 2026
977d04b
autoresearch: front-load tail mlp width on three-block carrier
Mar 22, 2026
f187e59
autoresearch: record accepted front-loaded three-block run
Mar 22, 2026
f8473ad
autoresearch: front-load tail q rank on three-block carrier
Mar 22, 2026
fcfc3ec
Revert "autoresearch: front-load tail q rank on three-block carrier"
Mar 22, 2026
62ed9c7
autoresearch: note ar5090-20260322-234027 reverted front-loaded tail …
Mar 22, 2026
ec6d2c2
autoresearch: add first-tail local mixer on three-block carrier
Mar 22, 2026
a8323d3
Revert "autoresearch: add first-tail local mixer on three-block carrier"
Mar 23, 2026
46f7cdf
autoresearch: note ar5090-20260322-235547 reverted first-tail local m…
Mar 23, 2026
8bb62b5
autoresearch: lift front-loaded three-block carrier to 832 context
Mar 23, 2026
cb8091b
Revert "autoresearch: lift front-loaded three-block carrier to 832 co…
Mar 23, 2026
1c0e2c1
autoresearch: note ar5090-20260323-000939 reverted 832 context
Mar 23, 2026
fffdc90
autoresearch: collapse front-loaded three-block carrier to two blocks
Mar 23, 2026
7385a96
Revert "autoresearch: collapse front-loaded three-block carrier to tw…
Mar 23, 2026
ce06b59
autoresearch: note ar5090-20260323-002359 reverted two-block carrier
Mar 23, 2026
f38cd67
autoresearch: recompress final-tail q on three-block carrier
Mar 23, 2026
b9c3a5a
Revert "autoresearch: recompress final-tail q on three-block carrier"
Mar 23, 2026
107f453
autoresearch: note ar5090-20260323-003828 reverted final-tail q recom…
Mar 23, 2026
a518d45
autoresearch: add final-tail smear gate on three-block carrier
Mar 23, 2026
48bbae0
Revert "autoresearch: add final-tail smear gate on three-block carrier"
Mar 23, 2026
baab51e
autoresearch: note ar5090-20260323-005311 reverted three-block smear …
Mar 23, 2026
3f224c8
autoresearch: later-skew non-final tail q on three-block carrier
Mar 23, 2026
3af5bf7
Revert "autoresearch: later-skew non-final tail q on three-block carr…
Mar 23, 2026
870e165
autoresearch: note ar5090-20260323-010809 reverted later-skew tail q
Mar 23, 2026
75723b6
autoresearch: extend 640 warmup on three-block carrier
Mar 23, 2026
200d98e
Revert "autoresearch: extend 640 warmup on three-block carrier"
Mar 23, 2026
aca3388
autoresearch: note ar5090-20260323-012159 reverted longer warmup
Mar 23, 2026
5856b23
autoresearch: delay remaining mlp fake quant on three-block carrier
Mar 23, 2026
7ad51b3
Revert "autoresearch: delay remaining mlp fake quant on three-block c…
Mar 23, 2026
eeeec43
autoresearch: note ar5090-20260323-013616 reverted delayed mlp qat
Mar 23, 2026
18332bb
autoresearch: add final-tail neighbor mixer on three-block carrier
Mar 23, 2026
38052d3
Revert "autoresearch: add final-tail neighbor mixer on three-block ca…
Mar 23, 2026
bd31548
autoresearch: note ar5090-20260323-014954 reverted final-tail neighbo…
Mar 23, 2026
14bd22d
autoresearch: add final-tail prefix-kv cache on three-block carrier
Mar 23, 2026
2b4e720
Revert "autoresearch: add final-tail prefix-kv cache on three-block c…
Mar 23, 2026
643597d
autoresearch: note ar5090-20260323-020429 reverted prefix-kv cache
Mar 23, 2026
2ddc6af
autoresearch: narrow d-model on front-loaded three-block carrier
Mar 23, 2026
8a92b22
Revert "autoresearch: narrow d-model on front-loaded three-block carr…
Mar 23, 2026
e354965
autoresearch: note ar5090-20260323-022247 reverted d-model downshift
Mar 23, 2026
59a1caa
autoresearch: move three-block float mlp anchor to middle tail
Mar 23, 2026
1134d13
Revert "autoresearch: move three-block float mlp anchor to middle tail"
Mar 23, 2026
20e04a0
autoresearch: note ar5090-20260323-023952 reverted middle-tail float …
Mar 23, 2026
9e98f8d
autoresearch: add shifted-token mlp on three-block carrier
Mar 23, 2026
03ee484
Revert "autoresearch: add shifted-token mlp on three-block carrier"
Mar 23, 2026
08a8d9c
autoresearch: note ar5090-20260323-025420 reverted shifted-token mlp
Mar 23, 2026
b1cd68d
autoresearch: compress non-final q rank on front-loaded three-block c…
Mar 23, 2026
09309ee
Revert "autoresearch: compress non-final q rank on front-loaded three…
Mar 23, 2026
4b40848
autoresearch: note ar5090-20260323-030917 reverted non-final q compre…
Mar 23, 2026
7ea056a
Add more competitive priors
Mar 23, 2026
0b6f1f9
Merge remote-tracking branch 'origin/main'
Mar 23, 2026
eb38b60
autoresearch: lift reclaimed-compute two-block carrier to 1024 curric…
Mar 23, 2026
5de3c9e
aggressive-autoresearch xsa_neural_cache: add top-layer cross-window …
Mar 23, 2026
cd56f63
Revert "aggressive-autoresearch xsa_neural_cache: add top-layer cross…
Mar 23, 2026
18d0315
Revert "autoresearch: lift reclaimed-compute two-block carrier to 102…
Mar 23, 2026
40555ec
autoresearch: note ar5090-20260323-032459 reverted two-block 1024 cur…
Mar 23, 2026
b56432b
aggressive-autoresearch xsa_neural_cache: add prefix-summary tail cache
Mar 23, 2026
2edcc7d
Revert "aggressive-autoresearch xsa_neural_cache: add prefix-summary …
Mar 23, 2026
1fb81ca
autoresearch: note ar5090-20260323-034415 reverted prefix-summary tai…
Mar 23, 2026
dbf63cc
aggressive-autoresearch xsa_neural_cache: add recent-window tail cache
Mar 23, 2026
1ff79bd
aggressive-autoresearch low_rank_q_reallocation: move full-rank q anc…
Mar 23, 2026
1dee121
Revert "aggressive-autoresearch xsa_neural_cache: add recent-window t…
Mar 23, 2026
1582048
Revert "aggressive-autoresearch low_rank_q_reallocation: move full-ra…
Mar 23, 2026
c1eb8bf
autoresearch: note ar5090-20260323-040048 reverted moved q anchor
Mar 23, 2026
46140ef
aggressive-autoresearch xsa_neural_cache: replace final tail attentio…
Mar 23, 2026
657f986
aggressive-autoresearch near_full_budget_carrier: complete xsa revert…
Mar 23, 2026
14aee2c
Revert "aggressive-autoresearch xsa_neural_cache: replace final tail …
Mar 23, 2026
b9d682a
Revert "aggressive-autoresearch near_full_budget_carrier: complete xs…
Mar 23, 2026
7a10478
autoresearch: note ar5090-20260323-042051 reverted xsa-revert repair
Mar 23, 2026
6130fd8
aggressive-autoresearch xsa_neural_cache: move local XSA cache to mid…
Mar 23, 2026
1a5f5e4
Revert "aggressive-autoresearch xsa_neural_cache: move local XSA cach…
Mar 23, 2026
5d5f6f0
autoresearch: note ar5090-20260323-043344 reverted middle-tail xsa cache
Mar 23, 2026
33ac719
aggressive-autoresearch xsa_neural_cache: add final-tail cross-window…
Mar 23, 2026
4c198cd
aggressive-autoresearch near_full_budget_carrier: stop auto-applying xsa
Mar 23, 2026
1317499
Revert "aggressive-autoresearch xsa_neural_cache: add final-tail cros…
Mar 23, 2026
2402bd0
Revert "aggressive-autoresearch near_full_budget_carrier: stop auto-a…
Mar 23, 2026
825cc55
aggressive-autoresearch near_full_budget_carrier: restore no-auto-xsa…
Mar 23, 2026
6c2132a
autoresearch: note ar5090-20260323-045148 reverted xsa auto-apply repair
Mar 23, 2026
1bb149f
Revert "aggressive-autoresearch near_full_budget_carrier: restore no-…
Mar 23, 2026
45462ce
aggressive-autoresearch late_selective_quantization: keep full final …
Mar 23, 2026
f1f95b4
Revert "aggressive-autoresearch late_selective_quantization: keep ful…
Mar 23, 2026
dbeffb2
aggressive-autoresearch near_full_budget_carrier: drop dangling auto-…
Mar 23, 2026
8c86b95
aggressive-autoresearch batch_or_context_curriculum: full-768 mid-bat…
Mar 23, 2026
1298d30
aggressive-autoresearch late_selective_quantization: keep final atten…
Mar 23, 2026
e746bba
Revert "aggressive-autoresearch batch_or_context_curriculum: full-768…
Mar 23, 2026
601eec3
autoresearch: note ar5090-20260323-051721 reverted full-768 mid-batch…
Mar 23, 2026
e7d33a7
Revert "aggressive-autoresearch late_selective_quantization: keep fin…
Mar 23, 2026
c09b370
aggressive-autoresearch near_full_budget_carrier: revisit four-block …
Mar 23, 2026
8981316
aggressive-autoresearch late_selective_quantization: keep first tail …
Mar 23, 2026
64b1f78
Revert "aggressive-autoresearch late_selective_quantization: keep fir…
Mar 23, 2026
d799df7
Revert "aggressive-autoresearch near_full_budget_carrier: revisit fou…
Mar 23, 2026
7d04af0
autoresearch: note ar5090-20260323-053527 reverted four-block near-ca…
Mar 23, 2026
c215f5f
aggressive-autoresearch late_selective_quantization: disable penultim…
Mar 23, 2026
72a4ba5
aggressive-autoresearch late_selective_quantization: keep clean final…
Mar 23, 2026
d15b81f
Revert "aggressive-autoresearch late_selective_quantization: keep cle…
Mar 23, 2026
97d69af
autoresearch: note ar5090-20260323-055344 reverted clean final tail k…
Mar 23, 2026
6a0006c
aggressive-autoresearch late_selective_quantization: keep final atten…
Mar 23, 2026
3d39b3a
aggressive-autoresearch low_rank_q_reallocation: later-bias full-cont…
Mar 23, 2026
da7cad7
Revert "aggressive-autoresearch late_selective_quantization: keep fin…
Mar 23, 2026
f671945
Revert "aggressive-autoresearch low_rank_q_reallocation: later-bias f…
Mar 23, 2026
9b4b79b
autoresearch: note ar5090-20260323-060956 reverted later-biased full-…
Mar 23, 2026
76c8069
aggressive-autoresearch near_full_budget_carrier: constant-sum front-…
Mar 23, 2026
89236e0
Revert "aggressive-autoresearch near_full_budget_carrier: constant-su…
Mar 23, 2026
fa9edc3
autoresearch: note ar5090-20260323-063115 reverted constant-sum front…
Mar 23, 2026
7153bb8
aggressive-autoresearch near_full_budget_carrier: d480 four-block fro…
Mar 23, 2026
56a0f1e
Revert "aggressive-autoresearch near_full_budget_carrier: d480 four-b…
Mar 23, 2026
4eb9e80
autoresearch: note ar5090-20260323-065339 reverted d480 four-block fr…
Mar 23, 2026
0a8b739
aggressive-autoresearch near_full_budget_carrier: remove stale full-c…
Mar 23, 2026
97d0b21
Revert "aggressive-autoresearch near_full_budget_carrier: remove stal…
Mar 23, 2026
b867e90
autoresearch: note ar5090-20260323-070801 reverted stale auto-apply r…
Mar 23, 2026
402345d
aggressive-autoresearch context_curriculum: restore 640 warmup on bra…
Mar 23, 2026
2d9fc6a
autoresearch: sync accepted state ar5090-20260323-081800
Mar 23, 2026
e716093
autoresearch: note ar5090-20260323-081800 accepted branch-tip curricu…
Mar 23, 2026
bd5e987
aggressive-autoresearch canon_or_neighbor_mixer: add final-tail neigh…
Mar 23, 2026
8c1215a
autoresearch: accept ar5090-20260323-074105 final-tail neighbor mixer
Mar 23, 2026
a4a766e
aggressive-autoresearch smarter_local_token_module: make branch-tip n…
Mar 23, 2026
4f0a202
Revert "aggressive-autoresearch smarter_local_token_module: make bran…
Mar 23, 2026
d2b263e
autoresearch: note ar5090-20260323-075541 reverted two-scale neighbor…
Mar 23, 2026
12d4ede
Merge remote-tracking branch 'origin/main' into aggressive-autoresear…
Mar 23, 2026
f5a3304
aggressive-autoresearch late_selective_quantization: restore penultim…
Mar 23, 2026
b0bbc68
Revert "aggressive-autoresearch late_selective_quantization: restore …
Mar 23, 2026
601604b
autoresearch: note ar5090-20260323-080853 reverted penultimate-tail m…
Mar 23, 2026
ab93f92
aggressive-autoresearch low_rank_q_reallocation: add second tail neig…
Mar 23, 2026
a160d52
Revert "aggressive-autoresearch low_rank_q_reallocation: add second t…
Mar 23, 2026
417d0b7
aggressive-autoresearch low_rank_q_reallocation: fund penultimate mix…
Mar 23, 2026
0093b9b
Revert "aggressive-autoresearch low_rank_q_reallocation: fund penulti…
Mar 23, 2026
90d6145
aggressive-autoresearch near_full_budget_carrier: collapse branch-tip…
Mar 23, 2026
35a3769
aggressive-autoresearch low_rank_q_reallocation: fund four-block dept…
Mar 23, 2026
f806b2d
Revert "aggressive-autoresearch near_full_budget_carrier: collapse br…
Mar 23, 2026
8e1efc6
autoresearch: note ar5090-20260323-090316 reverted two-block front-lo…
Mar 23, 2026
d2037a1
Revert "aggressive-autoresearch low_rank_q_reallocation: fund four-bl…
Mar 23, 2026
586b0ea
aggressive-autoresearch near_full_budget_carrier: front-load more wid…
Mar 23, 2026
212030a
aggressive-autoresearch low_rank_q_reallocation: front-load three-blo…
Mar 23, 2026
d2efd7f
Revert "aggressive-autoresearch near_full_budget_carrier: front-load …
Mar 23, 2026
1a88f68
autoresearch: note ar5090-20260323-091900 reverted extra final-tail f…
Mar 23, 2026
8845f38
Revert "aggressive-autoresearch low_rank_q_reallocation: front-load t…
Mar 23, 2026
1c583c8
aggressive-autoresearch low_rank_q_reallocation: widen early tail fro…
Mar 23, 2026
a225fea
aggressive-autoresearch low_rank_q_reallocation: fund final-tail mlp …
Mar 23, 2026
544ac22
Revert "aggressive-autoresearch low_rank_q_reallocation: fund final-t…
Mar 23, 2026
448c6f0
autoresearch: note ar5090-20260323-093709 reverted first-tail q funde…
Mar 23, 2026
c72a676
Revert "aggressive-autoresearch low_rank_q_reallocation: widen early …
Mar 23, 2026
0679351
aggressive-autoresearch canon_or_neighbor_mixer: canonicalize final-t…
Mar 23, 2026
ae1300d
Revert "aggressive-autoresearch canon_or_neighbor_mixer: canonicalize…
Mar 23, 2026
08bdb74
aggressive-autoresearch low_rank_q_reallocation: move full-rank q for…
Mar 23, 2026
ad59e86
autoresearch: note ar5090-20260323-095044 reverted canonicalized neig…
Mar 23, 2026
c1d5209
aggressive-autoresearch smeargate_or_ttt: swap branch-tip neighbor mi…
Mar 23, 2026
0d50fc8
Merge remote-tracking branch 'origin/main' into aggressive-autoresear…
Mar 23, 2026
0f31ebb
Revert "aggressive-autoresearch smeargate_or_ttt: swap branch-tip nei…
Mar 23, 2026
f82db50
autoresearch: note ar5090-20260323-100724 reverted branch-tip smear g…
Mar 23, 2026
149d890
aggressive-autoresearch low_rank_q_reallocation: fund 1024 curriculum…
Mar 23, 2026
a6fe520
Revert "aggressive-autoresearch low_rank_q_reallocation: fund 1024 cu…
Mar 23, 2026
b93fe68
aggressive-autoresearch low_rank_q_reallocation: spend branch-tip q o…
Mar 23, 2026
d1d807e
Revert "aggressive-autoresearch low_rank_q_reallocation: spend branch…
Mar 23, 2026
1ae1331
aggressive-autoresearch split_local_context_families: replace branch-…
Mar 23, 2026
458f725
Revert "aggressive-autoresearch split_local_context_families: replace…
Mar 23, 2026
2889b00
aggressive-autoresearch split_local_context_families: causal ttt with…
Mar 23, 2026
355bdd7
Revert "aggressive-autoresearch split_local_context_families: causal …
Mar 23, 2026
d54c1cc
aggressive-autoresearch split_local_context_families: replace branch-…
Mar 23, 2026
1c809d7
Revert "aggressive-autoresearch split_local_context_families: replace…
Mar 23, 2026
cb9669e
aggressive-autoresearch split_local_context_families: penultimate ada…
Mar 23, 2026
bf7d141
Revert "aggressive-autoresearch split_local_context_families: penulti…
Mar 23, 2026
6844dfc
aggressive-autoresearch split_local_context_families: switch branch-t…
Mar 23, 2026
e24ac25
Revert "aggressive-autoresearch split_local_context_families: switch …
Mar 23, 2026
bb95adb
aggressive-autoresearch split_local_context_families: broaden smear b…
Mar 23, 2026
1ea8fe5
Revert "aggressive-autoresearch split_local_context_families: broaden…
Mar 23, 2026
bd11096
aggressive-autoresearch compute_aware_context_curriculum: push branch…
Mar 23, 2026
b7a5502
Revert "aggressive-autoresearch compute_aware_context_curriculum: pus…
Mar 23, 2026
a95a90d
aggressive-autoresearch compute_aware_context_curriculum: 512-to-1024…
Mar 23, 2026
95b4dbe
Revert "aggressive-autoresearch compute_aware_context_curriculum: 512…
Mar 23, 2026
27c2931
aggressive-autoresearch compute_aware_context_curriculum: 512-to-896 …
Mar 23, 2026
28d1773
Revert "aggressive-autoresearch compute_aware_context_curriculum: 512…
Mar 23, 2026
44b9d95
aggressive-autoresearch compute_aware_context_curriculum: cheaper q c…
Mar 23, 2026
906e51d
Revert "aggressive-autoresearch compute_aware_context_curriculum: che…
Mar 23, 2026
e56fbe9
aggressive-autoresearch compute_aware_context_curriculum: 768-to-1024…
Mar 23, 2026
8b5cd2d
Revert "aggressive-autoresearch compute_aware_context_curriculum: 768…
Mar 23, 2026
a9a6111
aggressive-autoresearch compute_aware_context_curriculum: 768-to-1536…
Mar 23, 2026
2a3041b
Revert "aggressive-autoresearch compute_aware_context_curriculum: 768…
Mar 23, 2026
1d3acc6
aggressive-autoresearch smarter_local_token_module: penultimate facto…
Mar 23, 2026
bed0735
Revert "aggressive-autoresearch smarter_local_token_module: penultima…
Mar 23, 2026
a88fce4
aggressive-autoresearch smarter_local_token_module: exact bigram logi…
Mar 23, 2026
ecbf4a4
Revert "aggressive-autoresearch smarter_local_token_module: exact big…
Mar 23, 2026
d045b08
aggressive-autoresearch smarter_local_token_module: two-scale history…
Mar 23, 2026
5ce6e9f
Revert "aggressive-autoresearch smarter_local_token_module: two-scale…
Mar 23, 2026
c873662
aggressive-autoresearch smarter_local_token_module: split exact-hash …
Mar 23, 2026
5f72cf4
Revert "aggressive-autoresearch smarter_local_token_module: split exa…
Mar 23, 2026
2442c10
aggressive-autoresearch smarter_local_token_module: factorized histor…
Mar 23, 2026
4df3bc7
Revert "aggressive-autoresearch smarter_local_token_module: factorize…
Mar 23, 2026
17804a1
aggressive-autoresearch smarter_local_token_module: final hidden two-…
Mar 23, 2026
8d2f864
Revert "aggressive-autoresearch smarter_local_token_module: final hid…
Mar 23, 2026
b1d8093
aggressive-autoresearch quant_optimized_checkpoint_soup: no-qat post-…
Mar 23, 2026
419b355
Revert "aggressive-autoresearch quant_optimized_checkpoint_soup: no-q…
Mar 23, 2026
4af3ae9
aggressive-autoresearch quant_optimized_checkpoint_soup: late-qat fin…
Mar 23, 2026
47de854
Revert "aggressive-autoresearch quant_optimized_checkpoint_soup: late…
Mar 23, 2026
6ffadaf
aggressive-autoresearch quant_optimized_checkpoint_soup: dense no-qat…
Mar 23, 2026
96df81a
Revert "aggressive-autoresearch quant_optimized_checkpoint_soup: dens…
Mar 23, 2026
6ef6b5c
aggressive-autoresearch quant_optimized_checkpoint_soup: dense late-q…
Mar 23, 2026
ab5444a
Merge remote-tracking branch 'origin/main' into aggressive-autoresear…
Mar 23, 2026
ca0364e
aggressive-autoresearch quant_optimized_checkpoint_soup: depth12 ptq …
Mar 23, 2026
21173b8
Revert "aggressive-autoresearch quant_optimized_checkpoint_soup: dept…
Mar 23, 2026
58ec3fa
aggressive-autoresearch quant_optimized_checkpoint_soup: low-rank-q p…
Mar 23, 2026
7b04402
Revert "aggressive-autoresearch quant_optimized_checkpoint_soup: low-…
Mar 23, 2026
e78229d
aggressive-autoresearch quant_optimized_checkpoint_soup: aggressive d…
Mar 23, 2026
22a9206
Revert "aggressive-autoresearch quant_optimized_checkpoint_soup: aggr…
Mar 23, 2026
cf5f60a
aggressive-autoresearch localized_canon_inserts: bottom canon insert …
Mar 23, 2026
fd49ed1
Revert "aggressive-autoresearch localized_canon_inserts: bottom canon…
Mar 23, 2026
a76f52f
aggressive-autoresearch localized_canon_inserts: pre-final xsa canon …
Mar 23, 2026
f709053
Revert aggressive-autoresearch localized_canon_inserts: pre-final xsa…
Mar 23, 2026
64fb212
aggressive-autoresearch localized_canon_inserts: dual canon twelve-la…
Mar 23, 2026
2064cde
Revert aggressive-autoresearch localized_canon_inserts: dual canon tw…
Mar 23, 2026
bf02b3f
aggressive-autoresearch localized_canon_inserts: upper-tail canon ele…
Mar 23, 2026
c2904b0
Revert "aggressive-autoresearch localized_canon_inserts: upper-tail c…
Mar 23, 2026
f436479
aggressive-autoresearch localized_canon_inserts: low-rank-q canon twe…
Mar 23, 2026
7a229a2
Revert "aggressive-autoresearch localized_canon_inserts: low-rank-q c…
Mar 23, 2026
e3444d3
aggressive-autoresearch localized_canon_inserts: distributed canon sh…
Mar 23, 2026
771ab25
Revert "aggressive-autoresearch localized_canon_inserts: distributed …
Mar 23, 2026
63a00af
aggressive-autoresearch selective_int4_mlp: middle-band int4 eleven-l…
Mar 23, 2026
c50a799
Revert "aggressive-autoresearch selective_int4_mlp: middle-band int4 …
Mar 23, 2026
c8626f4
aggressive-autoresearch selective_int4_mlp: middle-band int4 twelve-l…
Mar 23, 2026
d90e8ce
Revert "aggressive-autoresearch selective_int4_mlp: middle-band int4 …
Mar 23, 2026
b7b4a58
aggressive-autoresearch selective_int4_mlp: lower-middle int4 history…
Mar 23, 2026
bbd7c9a
Revert "aggressive-autoresearch selective_int4_mlp: lower-middle int4…
Mar 23, 2026
50623fb
aggressive-autoresearch selective_int4_mlp: int4 low-rank-q carrier
Mar 23, 2026
06b665d
Revert "aggressive-autoresearch selective_int4_mlp: int4 low-rank-q c…
Mar 23, 2026
a4236e6
aggressive-autoresearch selective_int4_mlp: int4 xsa upper tail
Mar 23, 2026
c353f7c
Revert "aggressive-autoresearch selective_int4_mlp: int4 xsa upper tail"
Mar 23, 2026
7aad0b9
aggressive-autoresearch selective_int4_mlp: int4 long-context tail mo…
Mar 23, 2026
43114e8
Revert "aggressive-autoresearch selective_int4_mlp: int4 long-context…
Mar 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions .autoresearch/notes.md

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions .autoresearch/session.json
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
{
"accepted_artifact_bytes": 7770766,
"accepted_commit": "09535e0bf240ac86aa853ded331f797fe1560688",
"accepted_commit_short": "09535e0",
"accepted_results_path": "runs/autoresearch_5090/runs/ar5090-20260321-143929/results.json",
"accepted_run_id": "ar5090-20260321-143929",
"accepted_val_bpb": 1.5584812964313723,
"accepted_artifact_bytes": 15732516,
"accepted_commit": "bd5e987a00b69a9285e97534d376b5cad759e4d6",
"accepted_commit_short": "bd5e987",
"accepted_results_path": "runs/autoresearch_5090/runs/ar5090-20260323-074105/results.json",
"accepted_run_id": "ar5090-20260323-074105",
"accepted_val_bpb": 1.5254880853616,
"baseline_artifact_bytes": 7174403,
"baseline_results_path": "/workspace/autoresearch-parameter-golf/runs/autoresearch_5090/runs/baseline_5090_5min_rerun/results.json",
"baseline_run_id": "baseline_5090_5min_rerun",
"baseline_val_bpb": 1.5701376649442806,
"created_at_unix": 1774089643.238404,
"current_branch": "autoresearch/20260319-181724",
"current_branch": "aggressive-autoresearch/20260323-083105",
"current_experiment": null,
"latest_artifact_bytes": 7911607,
"latest_artifact_bytes": 15752024,
"latest_decision": "reverted",
"latest_results_path": "/workspace/autoresearch-parameter-golf/runs/autoresearch_5090/runs/ar5090-20260321-154606/results.json",
"latest_run_id": "ar5090-20260321-154606",
"latest_results_path": "/workspace/autoresearch-parameter-golf/runs/autoresearch_5090/runs/ar5090-20260323-100724/results.json",
"latest_run_id": "ar5090-20260323-100724",
"latest_status": "success",
"latest_val_bpb": 1.5672447217875,
"latest_val_bpb": 1.5398915807983933,
"repo_root": "/workspace/autoresearch-parameter-golf",
"schema_version": "pgolf.autoresearch_session.v1",
"search_policy": {
Expand Down Expand Up @@ -58,5 +58,5 @@
},
"state_dir": "/workspace/autoresearch-parameter-golf/.autoresearch",
"status": "ready",
"updated_at_unix": 1774108535.5078502
"updated_at_unix": 1774260881.4368758
}
12 changes: 12 additions & 0 deletions COMPETITIVE_PRIORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,15 @@ Treat the search as a story board, not as free-form mutation roulette:
- If recent history is narrow, switch stories before refining.
- If a story requires a missing self-contained module in `train.py`, implement the module instead of downgrading the story to a precision micro-tune.
- Keep module-writing stories isolated so the result is interpretable and easy to revert.

## Hot Steering Override
For the next search block, prioritize macro-topology diversity over local precision refinements.

Required macro axes:
- shared_layers vs recurrence_loops
- tail_layers
- d_model
- seq_len / curriculum
- local-module family

Do not spend more than two additional runs inside the same outer carrier topology without changing one of the axes above.
125 changes: 125 additions & 0 deletions configs/promoted/autoresearch_5090_best.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
{
"artifact_bundle_name": "submission_bundle",
"benchmark_eval_repeats": 1,
"benchmark_only": false,
"benchmark_train_steps": 3,
"checkpoint_every": 0,
"counted_code_paths": [
"train.py"
],
"deterministic": true,
"eval_first_step": false,
"evaluate_only": false,
"grad_accum_steps": 2,
"iterations": 200000,
"lawa_last_n_steps": 100,
"load_artifact_path": null,
"log_every": 10,
"max_wallclock_seconds": 300.0,
"metrics_jsonl_path": null,
"model": {
"adapter_alpha": 16.0,
"adapter_rank": 8,
"adapter_targets": [
"q",
"k",
"v",
"attn_out",
"mlp_in",
"mlp_out"
],
"attn_dropout": 0.0,
"attn_fake_quant_during_train": false,
"d_model": 512,
"emb_init_std": 0.02,
"fake_quant_during_train": true,
"fake_quant_start_step": 160,
"final_tail_mlp_fake_quant_during_train": false,
"final_tail_neighbor_mixer": true,
"final_tail_q_low_rank": 0,
"final_tail_smear_gate": false,
"logit_softcap": 30.0,
"mlp_mult": 2,
"non_recurrent_mlp_hidden_bonus": 3584,
"num_heads": 8,
"num_kv_heads": 4,
"penultimate_tail_mlp_fake_quant_during_train": false,
"q_low_rank": 128,
"qk_gain_init": 1.0,
"recurrence_loops": 0,
"resid_dropout": 0.0,
"rope_base": 10000.0,
"seq_len": 768,
"shared_layers": 0,
"shared_mlp_fake_quant_during_train": null,
"shared_mlp_hidden_bonus": 0,
"shared_q_low_rank": null,
"stem_layers": 0,
"tail_layers": 3,
"tail_mlp_hidden_bonuses": [
3840,
3584,
3328
],
"tie_embeddings": true,
"vocab_size": 1024
},
"optim": {
"adam_eps": 1e-08,
"beta1": 0.9,
"beta2": 0.95,
"embed_lr": 0.003,
"grad_clip_norm": 1.0,
"head_lr": 0.003,
"matrix_lr": 0.012,
"min_lr_scale": 0.1,
"muon_backend_steps": 5,
"muon_momentum": 0.95,
"scalar_lr": 0.003,
"warmdown_steps": 80,
"warmup_steps": 20,
"weight_decay": 0.0
},
"output_dir": "./runs/autoresearch_5090/promoted_current",
"quant": {
"clip_percentile": 96.5,
"keep_float_max_numel": 65536,
"keep_float_name_patterns": [
"norm",
"scale",
"gain",
"adapter",
"lm_head",
"tok_emb.weight",
"tail.2.mlp.",
"tail.2.attn.q_proj.weight",
"tail.2.attn.out_proj.weight"
],
"keep_float_store_dtype": "torch.float16",
"low_bit_bits": 6,
"low_bit_name_patterns": [
"mlp.fc.weight",
"mlp.proj.weight"
],
"scale_store_dtype": "torch.float16",
"zlib_level": 9
},
"results_tsv_path": "./runs/autoresearch_5090/results.tsv",
"resume_from": null,
"run_name": null,
"save_final_quantized": true,
"seed": 1337,
"tensorboard_log_dir": null,
"tokenizer_path": "./data/tokenizers/fineweb_1024_bpe.model",
"train_batch_tokens": 61440,
"train_pattern": "./data/datasets/fineweb10B_sp1024/fineweb_train_*.bin",
"train_phase_only": false,
"train_seq_len_min": 640,
"train_seq_len_warmup_steps": 160,
"use_compile": false,
"use_lawa": false,
"val_batch_tokens": 122880,
"val_every": 0,
"val_pattern": "./data/datasets/fineweb10B_sp1024/fineweb_val_*.bin",
"verify_export_reload": false
}
125 changes: 125 additions & 0 deletions configs/promoted/autoresearch_h100_1x_best.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
{
"artifact_bundle_name": "submission_bundle",
"benchmark_eval_repeats": 1,
"benchmark_only": false,
"benchmark_train_steps": 3,
"checkpoint_every": 100,
"counted_code_paths": [
"train.py"
],
"deterministic": true,
"eval_first_step": false,
"evaluate_only": false,
"grad_accum_steps": 2,
"iterations": 200000,
"lawa_last_n_steps": 100,
"load_artifact_path": null,
"log_every": 20,
"max_wallclock_seconds": 600.0,
"metrics_jsonl_path": null,
"model": {
"adapter_alpha": 16.0,
"adapter_rank": 8,
"adapter_targets": [
"q",
"k",
"v",
"attn_out",
"mlp_in",
"mlp_out"
],
"attn_dropout": 0.0,
"attn_fake_quant_during_train": false,
"d_model": 512,
"emb_init_std": 0.02,
"fake_quant_during_train": true,
"fake_quant_start_step": 160,
"final_tail_mlp_fake_quant_during_train": false,
"final_tail_neighbor_mixer": true,
"final_tail_q_low_rank": 0,
"final_tail_smear_gate": false,
"logit_softcap": 30.0,
"mlp_mult": 2,
"non_recurrent_mlp_hidden_bonus": 3584,
"num_heads": 8,
"num_kv_heads": 4,
"penultimate_tail_mlp_fake_quant_during_train": false,
"q_low_rank": 128,
"qk_gain_init": 1.0,
"recurrence_loops": 0,
"resid_dropout": 0.0,
"rope_base": 10000.0,
"seq_len": 768,
"shared_layers": 0,
"shared_mlp_fake_quant_during_train": null,
"shared_mlp_hidden_bonus": 0,
"shared_q_low_rank": null,
"stem_layers": 0,
"tail_layers": 3,
"tail_mlp_hidden_bonuses": [
3840,
3584,
3328
],
"tie_embeddings": true,
"vocab_size": 1024
},
"optim": {
"adam_eps": 1e-08,
"beta1": 0.9,
"beta2": 0.95,
"embed_lr": 0.003,
"grad_clip_norm": 1.0,
"head_lr": 0.003,
"matrix_lr": 0.012,
"min_lr_scale": 0.1,
"muon_backend_steps": 5,
"muon_momentum": 0.95,
"scalar_lr": 0.003,
"warmdown_steps": 80,
"warmup_steps": 20,
"weight_decay": 0.0
},
"output_dir": "./runs/runpod_h100_1x_10min/promoted",
"quant": {
"clip_percentile": 96.5,
"keep_float_max_numel": 65536,
"keep_float_name_patterns": [
"norm",
"scale",
"gain",
"adapter",
"lm_head",
"tok_emb.weight",
"tail.2.mlp.",
"tail.2.attn.q_proj.weight",
"tail.2.attn.out_proj.weight"
],
"keep_float_store_dtype": "torch.float16",
"low_bit_bits": 6,
"low_bit_name_patterns": [
"mlp.fc.weight",
"mlp.proj.weight"
],
"scale_store_dtype": "torch.float16",
"zlib_level": 9
},
"results_tsv_path": "./runs/runpod_h100_1x_10min/results.tsv",
"resume_from": null,
"run_name": null,
"save_final_quantized": true,
"seed": 1337,
"tensorboard_log_dir": null,
"tokenizer_path": "./data/tokenizers/fineweb_1024_bpe.model",
"train_batch_tokens": 61440,
"train_pattern": "./data/datasets/fineweb10B_sp1024/fineweb_train_*.bin",
"train_phase_only": false,
"train_seq_len_min": 640,
"train_seq_len_warmup_steps": 160,
"use_compile": false,
"use_lawa": true,
"val_batch_tokens": 122880,
"val_every": 100,
"val_pattern": "./data/datasets/fineweb10B_sp1024/fineweb_val_*.bin",
"verify_export_reload": true
}
Loading