From ab0a677522fe0fa8e11be7e4e72e03126a67709e Mon Sep 17 00:00:00 2001 From: Alexey Sharov Date: Wed, 4 Mar 2026 13:50:12 +0700 Subject: [PATCH 1/4] save --- db/kv/mdbx/kv_mdbx.go | 11 + db/kv/tables.go | 136 ++++---- db/migrations/migrations.go | 1 + db/rawdb/rawdbhelpers/rawdbhelpers.go | 4 +- db/state/domain_test.go | 15 +- db/state/gc_test.go | 11 +- db/state/history.go | 414 ++++++++++------------- db/state/history_key_txnum_range.go | 104 +----- db/state/history_key_txnum_range_test.go | 44 +-- db/state/history_stream.go | 284 ++++------------ db/state/history_test.go | 172 ++++------ db/state/statecfg/state_schema.go | 40 +-- db/state/statecfg/statecfg.go | 20 +- 13 files changed, 437 insertions(+), 819 deletions(-) diff --git a/db/kv/mdbx/kv_mdbx.go b/db/kv/mdbx/kv_mdbx.go index 48cc6e0f919..bb0cead251c 100644 --- a/db/kv/mdbx/kv_mdbx.go +++ b/db/kv/mdbx/kv_mdbx.go @@ -988,6 +988,17 @@ func (tx *MdbxTx) DropTable(bucket string) error { return tx.dropEvenIfBucketIsNotDeprecated(bucket) } +// DropTableForced drops a DBI and all its data unconditionally, regardless of IsDeprecated status. +// Intended for use in schema migrations where table flags need to change (e.g. adding/removing DupSort). +// After dropping, the table is re-created with the current config flags on the next DB open. +func DropTableForced(tx kv.RwTx, name string) error { + mdbxTx, ok := tx.(*MdbxTx) + if !ok { + return fmt.Errorf("DropTableForced: expected *MdbxTx, got %T", tx) + } + return mdbxTx.dropEvenIfBucketIsNotDeprecated(name) +} + func (tx *MdbxTx) ExistsTable(bucket string) (bool, error) { if cfg, ok := tx.db.buckets[bucket]; ok { return cfg.DBI != NonExistingDBI, nil diff --git a/db/kv/tables.go b/db/kv/tables.go index 55f78fb7a3e..1fdc84ef1be 100644 --- a/db/kv/tables.go +++ b/db/kv/tables.go @@ -144,35 +144,35 @@ const ( // Domains/History/InvertedIndices // Constants have "Tbl" prefix, to avoid collision with actual Domain names // This constants is very rarely used in APP, but Domain/History/Idx names are widely used - TblAccountVals = "AccountVals" - TblAccountHistoryKeys = "AccountHistoryKeys" - TblAccountHistoryVals = "AccountHistoryVals" - TblAccountIdx = "AccountIdx" - - TblStorageVals = "StorageVals" - TblStorageHistoryKeys = "StorageHistoryKeys" - TblStorageHistoryVals = "StorageHistoryVals" - TblStorageIdx = "StorageIdx" - - TblCodeVals = "CodeVals" - TblCodeHistoryKeys = "CodeHistoryKeys" - TblCodeHistoryVals = "CodeHistoryVals" - TblCodeIdx = "CodeIdx" - - TblCommitmentVals = "CommitmentVals" - TblCommitmentHistoryKeys = "CommitmentHistoryKeys" - TblCommitmentHistoryVals = "CommitmentHistoryVals" - TblCommitmentIdx = "CommitmentIdx" - - TblReceiptVals = "ReceiptVals" - TblReceiptHistoryKeys = "ReceiptHistoryKeys" - TblReceiptHistoryVals = "ReceiptHistoryVals" - TblReceiptIdx = "ReceiptIdx" - - TblRCacheVals = "ReceiptCacheVals" - TblRCacheHistoryKeys = "ReceiptCacheHistoryKeys" - TblRCacheHistoryVals = "ReceiptCacheHistoryVals" - TblRCacheIdx = "ReceiptCacheIdx" + TblAccountVals = "AccountVals" + TblAccountHistoryData = "AccountHistoryKeys" // DataTable: txNum+key → prev_value (non-DupSort, sequential writes) + TblAccountHistoryInvIdx = "AccountHistoryVals" // InvIndexTable: key → txNum (DupSort, no embedded value) + TblAccountIdx = "AccountIdx" + + TblStorageVals = "StorageVals" + TblStorageHistoryData = "StorageHistoryKeys" + TblStorageHistoryInvIdx = "StorageHistoryVals" + TblStorageIdx = "StorageIdx" + + TblCodeVals = "CodeVals" + TblCodeHistoryData = "CodeHistoryKeys" + TblCodeHistoryInvIdx = "CodeHistoryVals" + TblCodeIdx = "CodeIdx" + + TblCommitmentVals = "CommitmentVals" + TblCommitmentHistoryData = "CommitmentHistoryKeys" + TblCommitmentHistoryInvIdx = "CommitmentHistoryVals" + TblCommitmentIdx = "CommitmentIdx" + + TblReceiptVals = "ReceiptVals" + TblReceiptHistoryData = "ReceiptHistoryKeys" + TblReceiptHistoryInvIdx = "ReceiptHistoryVals" + TblReceiptIdx = "ReceiptIdx" + + TblRCacheVals = "ReceiptCacheVals" + TblRCacheHistoryData = "ReceiptCacheHistoryKeys" + TblRCacheHistoryInvIdx = "ReceiptCacheHistoryVals" + TblRCacheIdx = "ReceiptCacheIdx" TblLogAddressKeys = "LogAddressKeys" TblLogAddressIdx = "LogAddressIdx" @@ -186,7 +186,7 @@ const ( // Prune progress of execution: tableName -> [8bytes of invStep]latest pruned key // Could use table constants `Tbl{Account,Storage,Code,Commitment}Keys` for domains - // corresponding history tables `Tbl{Account,Storage,Code,Commitment}HistoryKeys` for history + // corresponding history tables `Tbl{Account,Storage,Code,Commitment}HistoryData` for history // and `Tbl{Account,Storage,Code,Commitment}Idx` for inverted indices TblPruningProgress = "PruningProgress" // tableName -> txTo;last pruned val @@ -342,33 +342,33 @@ var ChaindataTables = []string{ BorWitnesses, BorWitnessSizes, TblAccountVals, - TblAccountHistoryKeys, - TblAccountHistoryVals, + TblAccountHistoryData, + TblAccountHistoryInvIdx, TblAccountIdx, TblStorageVals, - TblStorageHistoryKeys, - TblStorageHistoryVals, + TblStorageHistoryData, + TblStorageHistoryInvIdx, TblStorageIdx, TblCodeVals, - TblCodeHistoryKeys, - TblCodeHistoryVals, + TblCodeHistoryData, + TblCodeHistoryInvIdx, TblCodeIdx, TblCommitmentVals, - TblCommitmentHistoryKeys, - TblCommitmentHistoryVals, + TblCommitmentHistoryData, + TblCommitmentHistoryInvIdx, TblCommitmentIdx, TblReceiptVals, - TblReceiptHistoryKeys, - TblReceiptHistoryVals, + TblReceiptHistoryData, + TblReceiptHistoryInvIdx, TblReceiptIdx, TblRCacheVals, - TblRCacheHistoryKeys, - TblRCacheHistoryVals, + TblRCacheHistoryData, + TblRCacheHistoryInvIdx, TblRCacheIdx, TblLogAddressKeys, @@ -521,31 +521,33 @@ var ChaindataTablesCfg = TableCfg{ DupToLen: 28, }, - TblAccountVals: {Flags: DupSort}, - TblAccountHistoryKeys: {Flags: DupSort}, - TblAccountHistoryVals: {Flags: DupSort}, - TblAccountIdx: {Flags: DupSort}, - - TblStorageVals: {Flags: DupSort}, - TblStorageHistoryKeys: {Flags: DupSort}, - TblStorageHistoryVals: {Flags: DupSort}, - TblStorageIdx: {Flags: DupSort}, - - TblCodeHistoryKeys: {Flags: DupSort}, - TblCodeIdx: {Flags: DupSort}, - - TblCommitmentVals: {Flags: DupSort}, - TblCommitmentHistoryKeys: {Flags: DupSort}, - TblCommitmentHistoryVals: {Flags: DupSort}, - TblCommitmentIdx: {Flags: DupSort}, - - TblReceiptVals: {Flags: DupSort}, - TblReceiptHistoryKeys: {Flags: DupSort}, - TblReceiptHistoryVals: {Flags: DupSort}, - TblReceiptIdx: {Flags: DupSort}, - - TblRCacheHistoryKeys: {Flags: DupSort}, - TblRCacheIdx: {Flags: DupSort}, + TblAccountVals: {Flags: DupSort}, + TblAccountHistoryData: {}, // DataTable: non-DupSort (txNum+key → value, sequential writes) + TblAccountHistoryInvIdx: {Flags: DupSort}, // InvIndexTable: key → txNum + TblAccountIdx: {Flags: DupSort}, + + TblStorageVals: {Flags: DupSort}, + TblStorageHistoryData: {}, + TblStorageHistoryInvIdx: {Flags: DupSort}, + TblStorageIdx: {Flags: DupSort}, + + TblCodeHistoryData: {}, // was DupSort (TblCodeHistoryKeys) + TblCodeHistoryInvIdx: {Flags: DupSort}, // was non-DupSort (TblCodeHistoryVals) + TblCodeIdx: {Flags: DupSort}, + + TblCommitmentVals: {Flags: DupSort}, + TblCommitmentHistoryData: {}, + TblCommitmentHistoryInvIdx: {Flags: DupSort}, + TblCommitmentIdx: {Flags: DupSort}, + + TblReceiptVals: {Flags: DupSort}, + TblReceiptHistoryData: {}, + TblReceiptHistoryInvIdx: {Flags: DupSort}, + TblReceiptIdx: {Flags: DupSort}, + + TblRCacheHistoryData: {}, // was DupSort (TblRCacheHistoryKeys) + TblRCacheHistoryInvIdx: {Flags: DupSort}, // was non-DupSort (TblRCacheHistoryVals) + TblRCacheIdx: {Flags: DupSort}, TblLogAddressKeys: {Flags: DupSort}, TblLogAddressIdx: {Flags: DupSort}, diff --git a/db/migrations/migrations.go b/db/migrations/migrations.go index fb86baabe46..b7d9a4d9345 100644 --- a/db/migrations/migrations.go +++ b/db/migrations/migrations.go @@ -53,6 +53,7 @@ var migrations = map[kv.Label][]Migration{ dbcfg.ChainDB: { dbSchemaVersion5, ResetStageTxnLookup, + HistoryTableFormatChange, }, dbcfg.TxPoolDB: {}, dbcfg.SentryDB: {}, diff --git a/db/rawdb/rawdbhelpers/rawdbhelpers.go b/db/rawdb/rawdbhelpers/rawdbhelpers.go index 27b2fc21f58..5af32f3eae3 100644 --- a/db/rawdb/rawdbhelpers/rawdbhelpers.go +++ b/db/rawdb/rawdbhelpers/rawdbhelpers.go @@ -23,8 +23,8 @@ import ( ) func IdxStepsCountV3(tx kv.Tx, stepSize uint64) float64 { - fst, _ := kv.FirstKey(tx, kv.TblAccountHistoryKeys) - lst, _ := kv.LastKey(tx, kv.TblAccountHistoryKeys) + fst, _ := kv.FirstKey(tx, kv.TblAccountHistoryData) + lst, _ := kv.LastKey(tx, kv.TblAccountHistoryData) if len(fst) > 0 && len(lst) > 0 { fstTxNum := binary.BigEndian.Uint64(fst) lstTxNum := binary.BigEndian.Uint64(lst) diff --git a/db/state/domain_test.go b/db/state/domain_test.go index 29f87cb239c..f8b0f26c6b2 100644 --- a/db/state/domain_test.go +++ b/db/state/domain_test.go @@ -1481,7 +1481,6 @@ func TestDomain_GetAfterAggregation(t *testing.T) { require.NoError(err) defer tx.Rollback() - d.HistoryLargeValues = false d.History.Compression = seg.CompressNone //seg.CompressKeys | seg.CompressVals d.Compression = seg.CompressNone //seg.CompressKeys | seg.CompressVals d.FilenameBase = kv.CommitmentDomain.String() @@ -1556,7 +1555,6 @@ func TestDomainRange(t *testing.T) { require.NoError(err) defer tx.Rollback() - d.HistoryLargeValues = false d.History.Compression = seg.CompressNone // seg.CompressKeys | seg.CompressVals d.Compression = seg.CompressNone // seg.CompressKeys | seg.CompressVals d.FilenameBase = kv.AccountsDomain.String() @@ -1674,7 +1672,6 @@ func TestDomain_CanScanPruneAfterAggregation(t *testing.T) { require.NoError(t, err) defer tx.Rollback() - d.HistoryLargeValues = false d.History.Compression = seg.CompressKeys | seg.CompressVals d.Compression = seg.CompressKeys | seg.CompressVals d.FilenameBase = kv.CommitmentDomain.String() @@ -1771,7 +1768,6 @@ func TestDomain_CanHashPruneAfterAggregation(t *testing.T) { require.NoError(t, err) defer tx.Rollback() - d.HistoryLargeValues = false d.History.Compression = seg.CompressKeys | seg.CompressVals d.Compression = seg.CompressKeys | seg.CompressVals d.FilenameBase = kv.CommitmentDomain.String() @@ -1869,7 +1865,6 @@ func TestDomain_PruneAfterAggregation(t *testing.T) { require.NoError(t, err) defer tx.Rollback() - d.HistoryLargeValues = false d.History.Compression = seg.CompressNone //seg.CompressKeys | seg.CompressVals d.Compression = seg.CompressNone //seg.CompressKeys | seg.CompressVals @@ -2016,7 +2011,6 @@ func TestDomain_PruneProgress(t *testing.T) { require.NoError(t, err) defer rwTx.Rollback() - d.HistoryLargeValues = false d.History.Compression = seg.CompressKeys | seg.CompressVals d.Compression = seg.CompressKeys | seg.CompressVals @@ -2542,7 +2536,6 @@ func TestDomainContext_findShortenedKey(t *testing.T) { require.NoError(t, err) defer tx.Rollback() - d.HistoryLargeValues = true domainRoTx := d.BeginFilesRo() defer domainRoTx.Close() writer := domainRoTx.NewWriter() @@ -2624,7 +2617,6 @@ func TestCanBuild(t *testing.T) { require.NoError(t, err) defer tx.Rollback() - d.HistoryLargeValues = true domainRoTx := d.BeginFilesRo() defer domainRoTx.Close() @@ -2662,24 +2654,23 @@ func TestTraceKey_SmallVals(t *testing.T) { if testing.Short() { t.Skip("slow test") } - testTraceKey(t, false) + testTraceKey(t) } func TestTraceKey_LargeVals(t *testing.T) { if testing.Short() { t.Skip("slow test") } - testTraceKey(t, true) + testTraceKey(t) } -func testTraceKey(t *testing.T, largeVals bool) { +func testTraceKey(t *testing.T) { logger := log.New() logEvery := time.NewTicker(30 * time.Second) defer logEvery.Stop() ctx := context.Background() db, d := testDbAndDomain(t, logger) - d.HistoryLargeValues = largeVals txs := fillDomain(t, d, db, logger) err := db.UpdateNosync(ctx, func(tx kv.RwTx) error { diff --git a/db/state/gc_test.go b/db/state/gc_test.go index 8c0237c4456..6f05a35e725 100644 --- a/db/state/gc_test.go +++ b/db/state/gc_test.go @@ -113,14 +113,9 @@ func TestGCReadAfterRemoveFile(t *testing.T) { require.Nil(lastOnFs.decompressor) }) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } func TestDomainGCReadAfterRemoveFile(t *testing.T) { diff --git a/db/state/history.go b/db/state/history.go index 07c4b5aae33..f80c132a17d 100644 --- a/db/state/history.go +++ b/db/state/history.go @@ -38,7 +38,6 @@ import ( "github.com/erigontech/erigon/db/etl" "github.com/erigontech/erigon/db/kv" "github.com/erigontech/erigon/db/kv/bitmapdb" - mdbx2 "github.com/erigontech/erigon/db/kv/mdbx" "github.com/erigontech/erigon/db/kv/order" "github.com/erigontech/erigon/db/kv/prune" "github.com/erigontech/erigon/db/kv/stream" @@ -382,48 +381,16 @@ func (w *historyBufferedWriter) AddPrevValue(k []byte, txNum uint64, original [] } binary.BigEndian.PutUint64(w.ii.txNumBytes[:], txNum) - //defer func() { - // fmt.Printf("addPrevValue [%p;tx=%d] '%x' -> '%x'\n", w, w.ii.txNum, key1, original) - //}() - - if w.largeValues { - lk := len(k) - - w.historyKey = append(append(w.historyKey[:0], k...), w.ii.txNumBytes[:]...) - historyKey := w.historyKey[:lk+8] - - if err := w.historyVals.Collect(historyKey, original); err != nil { - return err - } - - if !w.ii.discard { - if err := w.ii.indexKeys.Collect(w.ii.txNumBytes[:], historyKey[:lk]); err != nil { - return err - } - } - return nil - } - - lk := len(k) - w.historyKey = append(append(append(w.historyKey[:0], k...), w.ii.txNumBytes[:]...), original...) - historyKey := w.historyKey[:lk+8+len(original)] - historyKey1 := historyKey[:lk] - historyVal := historyKey[lk:] - invIdxVal := historyKey[:lk] - - if len(original) > 2048 { - log.Error("History value is too large while largeValues=false", "h", w.historyValsTable, "histo", string(w.historyKey[:lk]), "len", len(original), "max", len(w.historyKey)-8-len(k)) - panic("History value is too large while largeValues=false") + // DataTable: txNum+key → prev_value (non-DupSort, sequential writes) + w.historyKey = append(append(w.historyKey[:0], w.ii.txNumBytes[:]...), k...) + if err := w.dataVals.Collect(w.historyKey, original); err != nil { + return err } - if err := w.historyVals.Collect(historyKey1, historyVal); err != nil { + // InvIndexTable: key → txNum (DupSort, no embedded value) + if err := w.historyVals.Collect(k, w.ii.txNumBytes[:]); err != nil { return err } - if !w.ii.discard { - if err := w.ii.indexKeys.Collect(w.ii.txNumBytes[:], invIdxVal); err != nil { - return err - } - } return nil } @@ -432,45 +399,43 @@ func (ht *HistoryRoTx) NewWriter() *historyBufferedWriter { } type historyBufferedWriter struct { - historyVals *etl.Collector - historyKey []byte - discard bool - historyValsTable string - - // not large: - // keys: txNum -> key1+key2 - // vals: key1+key2 -> txNum + value (DupSort) - // large: - // keys: txNum -> key1+key2 - // vals: key1+key2+txNum -> value (not DupSort) - largeValues bool + historyVals *etl.Collector // InvIndexTable: key → txNum (DupSort, no embedded value) + dataVals *etl.Collector // DataTable: txNum+key → prevVal (non-DupSort, sequential writes) + historyKey []byte + discard bool + + historyValsTable string // = h.ValuesTable (InvIndexTable) + dataValsTable string // = h.KeysTable (DataTable) ii *InvertedIndexBufferedWriter } func (w *historyBufferedWriter) close() { - if w == nil { // allow dobule-close + if w == nil { // allow double-close return } w.ii.close() if w.historyVals != nil { w.historyVals.Close() } + if w.dataVals != nil { + w.dataVals.Close() + } } func (ht *HistoryRoTx) newWriter(tmpdir string, discard bool) *historyBufferedWriter { w := &historyBufferedWriter{ - discard: discard, - + discard: discard, historyKey: make([]byte, 128), - largeValues: ht.h.HistoryLargeValues, historyValsTable: ht.h.ValuesTable, - - ii: ht.iit.newWriter(tmpdir, discard), + dataValsTable: ht.h.KeysTable, + ii: ht.iit.newWriter(tmpdir, discard), } if !discard { w.historyVals = etl.NewCollectorWithAllocator(w.ii.filenameBase+".flush.hist", tmpdir, etl.SmallSortableBuffers, ht.h.logger). LogLvl(log.LvlTrace).SortAndFlushInBackground(true) + w.dataVals = etl.NewCollectorWithAllocator(w.ii.filenameBase+".flush.hist.data", tmpdir, etl.SmallSortableBuffers, ht.h.logger). + LogLvl(log.LvlTrace).SortAndFlushInBackground(true) } return w } @@ -486,6 +451,9 @@ func (w *historyBufferedWriter) Flush(ctx context.Context, tx kv.RwTx) error { if err := w.ii.Flush(ctx, tx); err != nil { return err } + if err := w.dataVals.Load(tx, w.dataValsTable, loadFunc, etl.TransformArgs{Quit: ctx.Done()}); err != nil { + return err + } if err := w.historyVals.Load(tx, w.historyValsTable, loadFunc, etl.TransformArgs{Quit: ctx.Done()}); err != nil { return err } @@ -558,28 +526,39 @@ func (h *History) collate(ctx context.Context, step kv.Step, txFrom, txTo uint64 } invIndexWriter := h.InvertedIndex.dataWriter(_efComp, true) // `Collate+Build` must be fast -> no Compression. Slowness here means growth of `chaindata` - keysCursor, err := roTx.CursorDupSort(h.KeysTable) + // DataTable: non-DupSort, key = txNum+key, val = prevVal (sequential by txNum) + dataCursor, err := roTx.Cursor(h.KeysTable) if err != nil { return HistoryCollation{}, fmt.Errorf("create %s history cursor: %w", h.FilenameBase, err) } - defer keysCursor.Close() + defer dataCursor.Close() binary.BigEndian.PutUint64(txKey[:], txFrom) collector := etl.NewCollectorWithAllocator(h.FilenameBase+".collate.hist", h.dirs.Tmp, etl.SmallSortableBuffers, h.logger).LogLvl(log.LvlTrace) defer collector.Close() collector.SortAndFlushInBackground(false) - for txnmb, k, err := keysCursor.Seek(txKey[:]); txnmb != nil; txnmb, k, err = keysCursor.Next() { + // valMap stores txNum+key → prevVal for lookup during loadBitmapsFunc + valMap := make(map[string][]byte) + + for k, v, err := dataCursor.Seek(txKey[:]); k != nil; k, v, err = dataCursor.Next() { if err != nil { return HistoryCollation{}, fmt.Errorf("iterate over %s history cursor: %w", h.FilenameBase, err) } - txNum := binary.BigEndian.Uint64(txnmb) + if len(k) < 8 { + break + } + txNum := binary.BigEndian.Uint64(k[:8]) if txNum >= txTo { // [txFrom; txTo) break } - if err := collector.Collect(k, txnmb); err != nil { - return HistoryCollation{}, fmt.Errorf("collect %s history key [%x]=>txn %d [%x]: %w", h.FilenameBase, k, txNum, txnmb, err) + actualKey := k[8:] + // Collect (actualKey, txNum_bytes) for bitmap building + if err := collector.Collect(actualKey, k[:8]); err != nil { + return HistoryCollation{}, fmt.Errorf("collect %s history key [%x]=>txn %d: %w", h.FilenameBase, actualKey, txNum, err) } + // Store prevVal keyed by txNum+key for value lookup in loadBitmapsFunc + valMap[string(k)] = common.Copy(v) select { case <-ctx.Done(): @@ -588,24 +567,7 @@ func (h *History) collate(ctx context.Context, step kv.Step, txFrom, txTo uint64 } } - var c kv.Cursor - var cd kv.CursorDupSort - if h.HistoryLargeValues { - c, err = roTx.Cursor(h.ValuesTable) - if err != nil { - return HistoryCollation{}, err - } - defer c.Close() - } else { - cd, err = roTx.CursorDupSort(h.ValuesTable) - if err != nil { - return HistoryCollation{}, err - } - defer cd.Close() - } - var ( - keyBuf = make([]byte, 0, 256) numBuf = make([]byte, 8) bitmap = bitmapdb.NewBitmap64() prevEf []byte @@ -642,35 +604,15 @@ func (h *History) collate(ctx context.Context, step kv.Step, txFrom, txTo uint64 seqBuilder.AddOffset(vTxNum) binary.BigEndian.PutUint64(numBuf, vTxNum) - if !h.HistoryLargeValues { - val, err := cd.SeekBothRange(prevKey, numBuf) - if err != nil { - return fmt.Errorf("seekBothRange %s history val [%x]: %w", h.FilenameBase, prevKey, err) - } - if val != nil && binary.BigEndian.Uint64(val) == vTxNum { - val = val[8:] - } else { - val = nil - } - - histKeyBuf = historyKey(vTxNum, prevKey, histKeyBuf) - if err := historyWriter.Add(histKeyBuf, val); err != nil { - return fmt.Errorf("add %s history val [%x]: %w", h.FilenameBase, prevKey, err) - } - continue - } - keyBuf = append(append(keyBuf[:0], prevKey...), numBuf...) - key, val, err := c.SeekExact(keyBuf) - if err != nil { - return fmt.Errorf("seekExact %s history val [%x]: %w", h.FilenameBase, key, err) - } + // Lookup prevVal from valMap using txNum+key + val := valMap[string(append(numBuf, prevKey...))] if len(val) == 0 { val = nil } histKeyBuf = historyKey(vTxNum, prevKey, histKeyBuf) if err := historyWriter.Add(histKeyBuf, val); err != nil { - return fmt.Errorf("add %s history val [%x]: %w", h.FilenameBase, key, err) + return fmt.Errorf("add %s history val [%x]: %w", h.FilenameBase, prevKey, err) } } bitmap.Clear() @@ -1065,49 +1007,9 @@ func (ht *HistoryRoTx) Prune(ctx context.Context, tx kv.RwTx, txFrom, txTo, limi } func (ht *HistoryRoTx) prune(ctx context.Context, rwTx kv.RwTx, txFrom, txTo, limit uint64, forced bool, logEvery *time.Ticker) (*InvertedIndexPruneStat, error) { - //fmt.Printf(" pruneH[%s] %t, %d-%d\n", ht.h.filenameBase, ht.CanPruneUntil(rwTx), txFrom, txTo) defer func(t time.Time) { mxPruneTookHistory.ObserveDuration(t) }(time.Now()) - var ( - // seek = make([]byte, 8, 256) - valsCDup kv.RwCursorDupSort - valsC kv.RwCursor - valsCP kv.PseudoDupSortRwCursor - err error - mode prune.StorageMode - ) - - if !ht.h.HistoryLargeValues { - valsCDup, err = rwTx.RwCursorDupSort(ht.h.ValuesTable) - if err != nil { - return nil, err - } - defer valsCDup.Close() - valsCP = valsCDup - mode = prune.PrefixValStorageMode - } else { - valsC, err = rwTx.RwCursor(ht.h.ValuesTable) - if err != nil { - return nil, err - } - defer valsC.Close() - mode = prune.KeyStorageMode - - switch c := valsC.(type) { - case *mdbx2.MdbxCursor: - valsCP = &mdbx2.MdbxCursorPseudoDupSort{MdbxCursor: c} - case *mdbx2.MdbxDupSortCursor: - valsCP = valsC.(*mdbx2.MdbxDupSortCursor) - default: - return nil, fmt.Errorf("unexpected cursor type %T for table %s", valsC, ht.h.ValuesTable) - } - } - - if !forced && ht.h.SnapshotsDisabled { - forced = true // or index.CanPrune will return false cuz no snapshots made - } - - return ht.iit.TableScanningPrune(ctx, rwTx, txFrom, txTo, limit, logEvery, forced, valsCP, &ht.h.ValuesTable, mxPruneSizeHistory, mode) + return ht.pruneHistoryTables(ctx, rwTx, txFrom, txTo, limit, logEvery) } // Prune [txFrom; txTo) @@ -1130,49 +1032,101 @@ func (ht *HistoryRoTx) OldPrune(ctx context.Context, tx kv.RwTx, txFrom, txTo, l } func (ht *HistoryRoTx) oldPrune(ctx context.Context, rwTx kv.RwTx, txFrom, txTo, limit uint64, forced bool, logEvery *time.Ticker) (*InvertedIndexPruneStat, error) { - //fmt.Printf(" pruneH[%s] %t, %d-%d\n", ht.h.filenameBase, ht.CanPruneUntil(rwTx), txFrom, txTo) defer func(t time.Time) { mxPruneTookHistory.ObserveDuration(t) }(time.Now()) - var ( - // seek = make([]byte, 8, 256) - valsCDup kv.RwCursorDupSort - valsC kv.RwCursor - valsCP kv.PseudoDupSortRwCursor - err error - mode prune.StorageMode - ) + return ht.pruneHistoryTables(ctx, rwTx, txFrom, txTo, limit, logEvery) +} + +// pruneHistoryTables prunes both the DataTable (non-DupSort, txNum+key → prevVal) +// and the InvIndexTable (DupSort, key → txNum) for the range [txFrom, txTo). +// At most `limit` distinct txNums are removed per call (0 means no limit). +func (ht *HistoryRoTx) pruneHistoryTables(ctx context.Context, rwTx kv.RwTx, txFrom, txTo, limit uint64, logEvery *time.Ticker) (*InvertedIndexPruneStat, error) { + if limit == 0 { + limit = math.MaxUint64 + } + + // DataTable: non-DupSort, key = txNum(8)+addr, value = prevVal. Sorted txNum-first. + dataC, err := rwTx.RwCursor(ht.h.KeysTable) + if err != nil { + return nil, err + } + defer dataC.Close() + + // InvIndexTable: DupSort, key = addr, dup_value = txNum(8). + invIdxC, err := rwTx.RwCursorDupSort(ht.h.ValuesTable) + if err != nil { + return nil, err + } + defer invIdxC.Close() + + stat := &InvertedIndexPruneStat{MinTxNum: math.MaxUint64} + var txKey, txNumBytes [8]byte + binary.BigEndian.PutUint64(txKey[:], txFrom) + + var lastTxNum = ^uint64(0) // sentinel meaning "no prior txNum seen" + var txCount uint64 // number of distinct txNums pruned so far - if !ht.h.HistoryLargeValues { - valsCDup, err = rwTx.RwCursorDupSort(ht.h.ValuesTable) + for k, _, err := dataC.Seek(txKey[:]); k != nil; k, _, err = dataC.Next() { if err != nil { - return nil, err + return nil, fmt.Errorf("prune %s history data: %w", ht.h.FilenameBase, err) } - defer valsCDup.Close() - valsCP = valsCDup - mode = prune.PrefixValStorageMode - } else { - valsC, err = rwTx.RwCursor(ht.h.ValuesTable) + if len(k) < 8 { + continue + } + txNum := binary.BigEndian.Uint64(k[:8]) + if txNum >= txTo { + break + } + + // Enforce limit: budget is per distinct txNum. + if txNum != lastTxNum { + if txCount >= limit { + break + } + txCount++ + lastTxNum = txNum + stat.MinTxNum = min(stat.MinTxNum, txNum) + stat.MaxTxNum = max(stat.MaxTxNum, txNum) + } + + key := k[8:] + binary.BigEndian.PutUint64(txNumBytes[:], txNum) + + // Remove the corresponding dup entry from InvIndexTable. + vv, err := invIdxC.SeekBothRange(key, txNumBytes[:]) if err != nil { - return nil, err + return nil, fmt.Errorf("prune %s history invIdx seek: %w", ht.h.FilenameBase, err) } - defer valsC.Close() - mode = prune.KeyStorageMode - - switch c := valsC.(type) { - case *mdbx2.MdbxCursor: - valsCP = &mdbx2.MdbxCursorPseudoDupSort{MdbxCursor: c} - case *mdbx2.MdbxDupSortCursor: - valsCP = valsC.(*mdbx2.MdbxDupSortCursor) + if len(vv) == 8 && binary.BigEndian.Uint64(vv) == txNum { + if err = invIdxC.DeleteCurrent(); err != nil { + return nil, fmt.Errorf("prune %s history invIdx delete: %w", ht.h.FilenameBase, err) + } + } + + // Remove the DataTable entry itself. + if err := dataC.DeleteCurrent(); err != nil { + return nil, fmt.Errorf("prune %s history data delete: %w", ht.h.FilenameBase, err) + } + stat.PruneCountValues++ + + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + select { + case <-logEvery.C: + ht.h.logger.Info("[snapshots] prune history", "name", ht.iit.name.String(), "pruned", stat.PruneCountValues) default: - return nil, fmt.Errorf("unexpected cursor type %T for table %s", valsC, ht.h.ValuesTable) } } - if !forced && ht.h.SnapshotsDisabled { - forced = true // or index.CanPrune will return false cuz no snapshots made + stat.PruneCountTx = txCount + if stat.MinTxNum == math.MaxUint64 { + stat.MinTxNum = 0 } - - return ht.iit.HashSeekingPrune(ctx, rwTx, txFrom, txTo, limit, logEvery, forced, valsCP, mxPruneSizeHistory, mode) + mxPruneSizeHistory.AddUint64(stat.PruneCountValues) + return stat, nil } func (ht *HistoryRoTx) Close() { @@ -1296,16 +1250,19 @@ func (ht *HistoryRoTx) HistorySeek(key []byte, txNum uint64, roTx kv.Tx) ([]byte return ht.historySeekInDB(key, txNum, roTx) } +// valsCursor opens the DataTable (non-DupSort, txNum+key → prevVal) for historySeekInDB step 2. func (ht *HistoryRoTx) valsCursor(tx kv.Tx) (c kv.Cursor, err error) { if ht.valsC != nil { return ht.valsC, nil } - ht.valsC, err = tx.Cursor(ht.h.ValuesTable) //nolint:gocritic + ht.valsC, err = tx.Cursor(ht.h.KeysTable) //nolint:gocritic if err != nil { return nil, err } return ht.valsC, nil } + +// valsCursorDup opens the InvIndexTable (DupSort, key → txNum) for historySeekInDB step 1. func (ht *HistoryRoTx) valsCursorDup(tx kv.Tx) (c kv.CursorDupSort, err error) { if ht.valsCDup != nil { return ht.valsCDup, nil @@ -1318,39 +1275,37 @@ func (ht *HistoryRoTx) valsCursorDup(tx kv.Tx) (c kv.CursorDupSort, err error) { } func (ht *HistoryRoTx) historySeekInDB(key []byte, txNum uint64, tx kv.Tx) ([]byte, bool, error) { - if ht.h.HistoryLargeValues { - c, err := ht.valsCursor(tx) - if err != nil { - return nil, false, err - } - seek := make([]byte, len(key)+8) - copy(seek, key) - binary.BigEndian.PutUint64(seek[len(key):], txNum) - - kAndTxNum, val, err := c.Seek(seek) - if err != nil { - return nil, false, err - } - if kAndTxNum == nil || !bytes.Equal(kAndTxNum[:len(kAndTxNum)-8], key) { - return nil, false, nil - } - // val == []byte{}, means key was created in this txNum and doesn't exist before. - return val, true, nil - } - c, err := ht.valsCursorDup(tx) + // Step 1: InvIndexTable (DupSort, key → txNum) — find first txNum >= requested + invIdxC, err := ht.valsCursorDup(tx) if err != nil { return nil, false, err } - val, err := c.SeekBothRange(key, ht.encodeTs(txNum, nil)) + // encodeTs(txNum, nil) returns 8-byte txNum in ht._bufTs + val, err := invIdxC.SeekBothRange(key, ht.encodeTs(txNum, nil)) if err != nil { return nil, false, err } if val == nil { return nil, false, nil } - // `val == []byte{}` means key was created in this txNum and doesn't exist before. - v := val[8:] - return v, true, nil + txNumFound := binary.BigEndian.Uint64(val) + + // Step 2: DataTable (non-DupSort, txNum+key → prevVal) — fetch actual value + dataC, err := ht.valsCursor(tx) + if err != nil { + return nil, false, err + } + ht._bufTs = binary.BigEndian.AppendUint64(ht._bufTs[:0], txNumFound) + ht._bufTs = append(ht._bufTs, key...) + _, prevVal, err := dataC.SeekExact(ht._bufTs) + if err != nil { + return nil, false, err + } + if prevVal == nil { + return nil, false, nil + } + // prevVal == []byte{} means key was created in this txNum and doesn't exist before. + return prevVal, true, nil } func (ht *HistoryRoTx) RangeAsOf(ctx context.Context, startTxNum uint64, from, to []byte, asc order.By, limit int, roTx kv.Tx) (stream.KV, error) { @@ -1370,10 +1325,10 @@ func (ht *HistoryRoTx) RangeAsOf(ctx context.Context, startTxNum uint64, from, t } dbit := &HistoryRangeAsOfDB{ - largeValues: ht.h.HistoryLargeValues, - roTx: roTx, - valsTable: ht.h.ValuesTable, - from: from, toPrefix: to, limit: kv.Unlim, orderAscend: asc, + roTx: roTx, + valsTable: ht.h.ValuesTable, + dataTable: ht.h.KeysTable, + from: from, toPrefix: to, limit: kv.Unlim, orderAscend: asc, startTxNum: startTxNum, @@ -1444,11 +1399,11 @@ func (ht *HistoryRoTx) iterateChangedRecent(fromTxNum, toTxNum int, asc order.By return stream.EmptyKV, nil } s := &HistoryChangesIterDB{ - endTxNum: toTxNum, - roTx: roTx, - largeValues: ht.h.HistoryLargeValues, - valsTable: ht.h.ValuesTable, - limit: limit, + endTxNum: toTxNum, + roTx: roTx, + valsTable: ht.h.ValuesTable, + dataTable: ht.h.KeysTable, + limit: limit, } if fromTxNum >= 0 { binary.BigEndian.PutUint64(s.startTxKey[:], uint64(fromTxNum)) @@ -1603,32 +1558,7 @@ func (ht *HistoryRoTx) CompactRange(ctx context.Context, fromTxNum, toTxNum uint } func (ht *HistoryRoTx) idxRangeOnDB(key []byte, startTxNum, endTxNum int, asc order.By, limit int, roTx kv.Tx) (stream.U64, error) { - if ht.h.HistoryLargeValues { - from := make([]byte, len(key)+8) - copy(from, key) - var fromTxNum uint64 - if startTxNum >= 0 { - fromTxNum = uint64(startTxNum) - } - binary.BigEndian.PutUint64(from[len(key):], fromTxNum) - to := common.Copy(from) - toTxNum := uint64(math.MaxUint64) - if endTxNum >= 0 { - toTxNum = uint64(endTxNum) - } - binary.BigEndian.PutUint64(to[len(key):], toTxNum) - it, err := roTx.Range(ht.h.ValuesTable, from, to, asc, limit) - if err != nil { - return nil, err - } - return stream.TransformKV2U64(it, func(k, v []byte) (uint64, error) { - if len(k) < 8 { - return 0, fmt.Errorf("unexpected large key length %d", len(k)) - } - return binary.BigEndian.Uint64(k[len(k)-8:]), nil - }), nil - } - + // InvIndexTable (DupSort, key → txNum): range query on dup values var from, to []byte if startTxNum >= 0 { from = make([]byte, 8) @@ -1644,7 +1574,7 @@ func (ht *HistoryRoTx) idxRangeOnDB(key []byte, startTxNum, endTxNum int, asc or } return stream.TransformKV2U64(it, func(k, v []byte) (uint64, error) { if len(v) < 8 { - return 0, fmt.Errorf("unexpected small value length %d", len(v)) + return 0, fmt.Errorf("unexpected value length %d in history inverted index", len(v)) } return binary.BigEndian.Uint64(v), nil }), nil @@ -1676,12 +1606,12 @@ func (ht *HistoryRoTx) DebugHistoryTraceKey(ctx context.Context, key []byte, fro return nil, err } db := HistoryTraceKeyDB{ - largeValues: ht.h.HistoryLargeValues, - roTx: roTx, - valsTable: ht.h.ValuesTable, - fromTxNum: fromTxNum, - toTxNum: toTxNum, - key: key, + roTx: roTx, + valsTable: ht.h.ValuesTable, + dataTable: ht.h.KeysTable, + fromTxNum: fromTxNum, + toTxNum: toTxNum, + key: key, logger: ht.h.logger, ctx: ctx, diff --git a/db/state/history_key_txnum_range.go b/db/state/history_key_txnum_range.go index 7342950f5db..995dfd76a62 100644 --- a/db/state/history_key_txnum_range.go +++ b/db/state/history_key_txnum_range.go @@ -17,7 +17,6 @@ package state import ( - "bytes" "container/heap" "encoding/binary" @@ -80,11 +79,10 @@ func (ht *HistoryRoTx) iterateKeyTxNumRecent(fromTxNum, toTxNum int, asc order.B return stream.EmptyKU64, nil } s := &HistoryKeyTxNumIterDB{ - endTxNum: toTxNum, - roTx: roTx, - largeValues: ht.h.HistoryLargeValues, - valsTable: ht.h.ValuesTable, - limit: limit, + endTxNum: toTxNum, + roTx: roTx, + valsTable: ht.h.ValuesTable, + limit: limit, } if fromTxNum >= 0 { s.startTxNum = uint64(fromTxNum) @@ -189,11 +187,10 @@ func (hi *HistoryKeyTxNumIterFiles) Next() ([]byte, uint64, error) { // HistoryKeyTxNumIterDB emits (key, txNum) for every txNum at which a key changed in the DB. // Unlike HistoryChangesIterDB, it iterates ALL dups per key (not just the first one). type HistoryKeyTxNumIterDB struct { - largeValues bool - roTx kv.Tx - valsC kv.Cursor - valsCDup kv.CursorDupSort - valsTable string + roTx kv.Tx + valsCDup kv.CursorDupSort + valsTable string // InvIndexTable: key → txNum (DupSort) + limit, endTxNum int startTxNum uint64 startTxKey [8]byte // startTxNum encoded as big-endian for cursor seeks @@ -206,9 +203,6 @@ type HistoryKeyTxNumIterDB struct { } func (hi *HistoryKeyTxNumIterDB) Close() { - if hi.valsC != nil { - hi.valsC.Close() - } if hi.valsCDup != nil { hi.valsCDup.Close() } @@ -221,9 +215,6 @@ func (hi *HistoryKeyTxNumIterDB) setNext(k []byte, txNum uint64) { } func (hi *HistoryKeyTxNumIterDB) advance() error { - if hi.largeValues { - return hi.advanceLargeVals() - } return hi.advanceSmallVals() } @@ -234,7 +225,8 @@ func (hi *HistoryKeyTxNumIterDB) seekNextSmallKey(k []byte) error { return err } if v != nil { - txNum := binary.BigEndian.Uint64(v[:8]) + // InvIndexTable dup value is 8-byte txNum only (no embedded value) + txNum := binary.BigEndian.Uint64(v) if hi.endTxNum < 0 || int(txNum) < hi.endTxNum { hi.setNext(k, txNum) return nil @@ -273,7 +265,8 @@ func (hi *HistoryKeyTxNumIterDB) advanceSmallVals() error { return err } if v != nil { - txNum := binary.BigEndian.Uint64(v[:8]) + // InvIndexTable dup value is 8-byte txNum only (no embedded value) + txNum := binary.BigEndian.Uint64(v) if hi.endTxNum < 0 || int(txNum) < hi.endTxNum { hi.setNext(k, txNum) return nil @@ -286,79 +279,6 @@ func (hi *HistoryKeyTxNumIterDB) advanceSmallVals() error { return hi.seekNextSmallKey(k) } -func (hi *HistoryKeyTxNumIterDB) advanceLargeVals() error { - var err error - if hi.valsC == nil { - if hi.valsC, err = hi.roTx.Cursor(hi.valsTable); err != nil { - return err - } - k, _, err := hi.valsC.First() - if err != nil { - return err - } - if k == nil { - hi.nextKey = nil - return nil - } - seek := append(common.Copy(k[:len(k)-8]), hi.startTxKey[:]...) - k, _, err = hi.valsC.Seek(seek) - if err != nil { - return err - } - return hi.scanLargeVals(k) - } - - k, _, err := hi.valsC.Next() - if err != nil { - return err - } - if k == nil { - hi.nextKey = nil - return nil - } - if hi.nextKey != nil && !bytes.Equal(k[:len(k)-8], hi.nextKey) { - seek := append(common.Copy(k[:len(k)-8]), hi.startTxKey[:]...) - k, _, err = hi.valsC.Seek(seek) - if err != nil { - return err - } - } - return hi.scanLargeVals(k) -} - -func (hi *HistoryKeyTxNumIterDB) scanLargeVals(k []byte) error { - for k != nil { - txNum := binary.BigEndian.Uint64(k[len(k)-8:]) - if hi.endTxNum >= 0 && int(txNum) >= hi.endTxNum { - next, ok := kv.NextSubtree(k[:len(k)-8]) - if !ok { - hi.nextKey = nil - return nil - } - seek := append(next, hi.startTxKey[:]...) - var err error - k, _, err = hi.valsC.Seek(seek) - if err != nil { - return err - } - continue - } - if txNum < binary.BigEndian.Uint64(hi.startTxKey[:]) { - seek := append(common.Copy(k[:len(k)-8]), hi.startTxKey[:]...) - var err error - k, _, err = hi.valsC.Seek(seek) - if err != nil { - return err - } - continue - } - hi.setNext(k[:len(k)-8], txNum) - return nil - } - hi.nextKey = nil - return nil -} - func (hi *HistoryKeyTxNumIterDB) HasNext() bool { return hi.err != nil || (hi.limit != 0 && hi.nextKey != nil) } diff --git a/db/state/history_key_txnum_range_test.go b/db/state/history_key_txnum_range_test.go index 62a0931f8d8..dc9f809a28f 100644 --- a/db/state/history_key_txnum_range_test.go +++ b/db/state/history_key_txnum_range_test.go @@ -109,14 +109,9 @@ func TestHistoryKeyTxNumRange(t *testing.T) { require.NoError(err) require.Equal(expectedKeyTxNums(980, 1001, txs), collectKeyTxNumRange(t, it)) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } func TestHistoryKeyTxNumRange_EdgeCases(t *testing.T) { @@ -191,14 +186,9 @@ func TestHistoryKeyTxNumRange_EdgeCases(t *testing.T) { require.Equal(expectedKeyTxNums(6, 7, txs), collectKeyTxNumRange(t, it)) }) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } func TestHistoryKeyTxNumRange_DBOnly(t *testing.T) { @@ -231,14 +221,9 @@ func TestHistoryKeyTxNumRange_DBOnly(t *testing.T) { require.NoError(err) require.Len(collectKeyTxNumRange(t, it), 3) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } func TestHistoryKeyTxNumRange_RandomRanges(t *testing.T) { @@ -289,12 +274,7 @@ func TestHistoryKeyTxNumRange_RandomRanges(t *testing.T) { require.Equal(expected, got, "iter %d: from=%d to=%d limit=%d", i, from, to, limit) } } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } diff --git a/db/state/history_stream.go b/db/state/history_stream.go index e9d52f03666..85b0bd42365 100644 --- a/db/state/history_stream.go +++ b/db/state/history_stream.go @@ -210,11 +210,11 @@ func (hi *HistoryRangeAsOfFiles) Next() ([]byte, []byte, error) { // HistoryRangeAsOfDB - returns state range at given time in history type HistoryRangeAsOfDB struct { - largeValues bool - roTx kv.Tx - valsC kv.Cursor - valsCDup kv.CursorDupSort - valsTable string + roTx kv.Tx + valsCDup kv.CursorDupSort // InvIndexTable: key → txNum + dataC kv.Cursor // DataTable: txNum+key → prevVal + valsTable string // InvIndexTable name + dataTable string // DataTable name from, toPrefix []byte orderAscend order.By @@ -233,8 +233,11 @@ type HistoryRangeAsOfDB struct { } func (hi *HistoryRangeAsOfDB) Close() { - if hi.valsC != nil { - hi.valsC.Close() + if hi.valsCDup != nil { + hi.valsCDup.Close() + } + if hi.dataC != nil { + hi.dataC.Close() } } @@ -243,67 +246,14 @@ func (hi *HistoryRangeAsOfDB) Trace(prefix string) *stream.TracedDuo[[]byte, []b } func (hi *HistoryRangeAsOfDB) advance() (err error) { - // not large: - // keys: txNum -> key1+key2 - // vals: key1+key2 -> txNum + value (DupSort) - // large: - // keys: txNum -> key1+key2 - // vals: key1+key2+txNum -> value (not DupSort) - if hi.largeValues { - return hi.advanceLargeVals() - } - return hi.advanceSmallVals() -} -func (hi *HistoryRangeAsOfDB) advanceLargeVals() error { var seek []byte - var err error - if hi.valsC == nil { - if hi.valsC, err = hi.roTx.Cursor(hi.valsTable); err != nil { - return err - } - firstKey, _, err := hi.valsC.Seek(hi.from) - if err != nil { - return err - } - if firstKey == nil { - hi.nextKey = nil - return nil - } - seek = append(common.Copy(firstKey[:len(firstKey)-8]), hi.startTxKey[:]...) - } else { - next, ok := kv.NextSubtree(hi.nextKey) - if !ok { - hi.nextKey = nil - return nil - } - - seek = append(next, hi.startTxKey[:]...) - } - for k, v, err := hi.valsC.Seek(seek); k != nil; k, v, err = hi.valsC.Seek(seek) { - if err != nil { - return err - } - if hi.toPrefix != nil && bytes.Compare(k[:len(k)-8], hi.toPrefix) >= 0 { - break - } - if !bytes.Equal(seek[:len(k)-8], k[:len(k)-8]) { - copy(seek[:len(k)-8], k[:len(k)-8]) - continue - } - hi.nextKey = k[:len(k)-8] - hi.nextVal = v - return nil - } - hi.nextKey = nil - return nil -} -func (hi *HistoryRangeAsOfDB) advanceSmallVals() error { - var seek []byte - var err error if hi.valsCDup == nil { if hi.valsCDup, err = hi.roTx.CursorDupSort(hi.valsTable); err != nil { return err } + if hi.dataC, err = hi.roTx.Cursor(hi.dataTable); err != nil { + return err + } seek = hi.from } else { next, ok := kv.NextSubtree(hi.nextKey) @@ -321,6 +271,7 @@ func (hi *HistoryRangeAsOfDB) advanceSmallVals() error { if hi.toPrefix != nil && bytes.Compare(k, hi.toPrefix) >= 0 { break } + // InvIndexTable: SeekBothRange(key, txNum) → first txNum >= startTxNum for this key v, err := hi.valsCDup.SeekBothRange(k, hi.startTxKey[:]) if err != nil { return err @@ -331,13 +282,20 @@ func (hi *HistoryRangeAsOfDB) advanceSmallVals() error { break } if k, _, err = hi.valsCDup.Seek(seek); err != nil { - panic(err) + return err } continue } - + txNum := binary.BigEndian.Uint64(v) + // DataTable: txNum+key → prevVal + dataKey := binary.BigEndian.AppendUint64(nil, txNum) + dataKey = append(dataKey, k...) + _, val, err := hi.dataC.SeekExact(dataKey) + if err != nil { + return fmt.Errorf("HistoryRangeAsOfDB seek data for key %x txNum %d: %w", k, txNum, err) + } hi.nextKey = k - hi.nextVal = v[8:] + hi.nextVal = val return nil } hi.nextKey = nil @@ -504,11 +462,11 @@ func (hi *HistoryChangesIterFiles) Next() ([]byte, []byte, error) { } type HistoryChangesIterDB struct { - largeValues bool roTx kv.Tx - valsC kv.Cursor valsCDup kv.CursorDupSort - valsTable string + dataC kv.Cursor + valsTable string // InvIndexTable: key → txNum (DupSort) + dataTable string // DataTable: txNum+key → prevVal (non-DupSort) limit, endTxNum int startTxKey [8]byte @@ -518,103 +476,24 @@ type HistoryChangesIterDB struct { } func (hi *HistoryChangesIterDB) Close() { - if hi.valsC != nil { - hi.valsC.Close() + if hi.dataC != nil { + hi.dataC.Close() } if hi.valsCDup != nil { hi.valsCDup.Close() } } + func (hi *HistoryChangesIterDB) advance() (err error) { - // not large: - // keys: txNum -> key1+key2 - // vals: key1+key2 -> txNum + value (DupSort) - // large: - // keys: txNum -> key1+key2 - // vals: key1+key2+txNum -> value (not DupSort) - if hi.largeValues { - return hi.advanceLargeVals() - } return hi.advanceSmallVals() } -func (hi *HistoryChangesIterDB) advanceLargeVals() error { - var seek []byte - var err error - if hi.valsC == nil { - if hi.valsC, err = hi.roTx.Cursor(hi.valsTable); err != nil { - return err - } - firstKey, _, err := hi.valsC.First() - if err != nil { - return err - } - if firstKey == nil { - hi.nextKey = nil - return nil - } - seek = append(common.Copy(firstKey[:len(firstKey)-8]), hi.startTxKey[:]...) - } else { - next, ok := kv.NextSubtree(hi.nextKey) - if !ok { - hi.nextKey = nil - return nil - } - - seek = append(next, hi.startTxKey[:]...) - } - for k, v, err := hi.valsC.Seek(seek); k != nil; k, v, err = hi.valsC.Seek(seek) { - if err != nil { - return err - } - if hi.endTxNum >= 0 && int(binary.BigEndian.Uint64(k[len(k)-8:])) >= hi.endTxNum { - next, ok := kv.NextSubtree(k[:len(k)-8]) - if !ok { - hi.nextKey = nil - return nil - } - seek = append(next, hi.startTxKey[:]...) - continue - } - if hi.nextKey != nil && bytes.Equal(k[:len(k)-8], hi.nextKey) && bytes.Equal(v, hi.nextVal) { - // stuck on the same key, move to first key larger than seek - for { - k, v, err = hi.valsC.Next() - if err != nil { - return err - } - if k == nil { - hi.nextKey = nil - return nil - } - if bytes.Compare(seek[:len(seek)-8], k[:len(k)-8]) < 0 { - break - } - } - } - //fmt.Printf("[seek=%x][RET=%t] '%x' '%x'\n", seek, bytes.Equal(seek[:len(seek)-8], k[:len(k)-8]), k, v) - if !bytes.Equal(seek[:len(seek)-8], k[:len(k)-8]) /*|| int(binary.BigEndian.Uint64(k[len(k)-8:])) > hi.endTxNum */ { - if len(seek) != len(k) { - seek = append(append(seek[:0], k[:len(k)-8]...), hi.startTxKey[:]...) - continue - } - copy(seek[:len(k)-8], k[:len(k)-8]) - continue - } - hi.nextKey = k[:len(k)-8] - hi.nextVal = v - return nil - } - hi.nextKey = nil - return nil -} func (hi *HistoryChangesIterDB) advanceSmallVals() (err error) { var k []byte if hi.valsCDup == nil { if hi.valsCDup, err = hi.roTx.CursorDupSort(hi.valsTable); err != nil { return err } - if k, _, err = hi.valsCDup.First(); err != nil { return err } @@ -640,10 +519,23 @@ func (hi *HistoryChangesIterDB) advanceSmallVals() (err error) { } continue } - foundTxNumVal := v[:8] - if hi.endTxNum < 0 || int(binary.BigEndian.Uint64(foundTxNumVal)) < hi.endTxNum { + // InvIndexTable dup value is 8-byte txNum only (no embedded value) + txNum := binary.BigEndian.Uint64(v) + if hi.endTxNum < 0 || int(txNum) < hi.endTxNum { + if hi.dataC == nil { + if hi.dataC, err = hi.roTx.Cursor(hi.dataTable); err != nil { + return err + } + } + // DataTable: txNum+key → prevVal + dataKey := binary.BigEndian.AppendUint64(nil, txNum) + dataKey = append(dataKey, k...) + _, val, err := hi.dataC.SeekExact(dataKey) + if err != nil { + return fmt.Errorf("HistoryChangesIterDB data lookup key %x txNum %d: %w", k, txNum, err) + } hi.nextKey = k - hi.nextVal = v[8:] + hi.nextVal = val return nil } k, _, err = hi.valsCDup.NextNoDup() @@ -845,9 +737,9 @@ func (ht *HistoryTraceKeyFiles) Next() (uint64, []byte, error) { } type HistoryTraceKeyDB struct { - largeValues bool - roTx kv.Tx - valsTable string + roTx kv.Tx + valsTable string // InvIndexTable: key → txNum (DupSort) + dataTable string // DataTable: txNum+key → prevVal (non-DupSort) fromTxNum, toTxNum uint64 key []byte @@ -856,10 +748,10 @@ type HistoryTraceKeyDB struct { ctx context.Context // private - txNum uint64 - startTxNumBytes, k, v []byte - valsC kv.Cursor - valsCDup kv.CursorDupSort + txNum uint64 + k, v []byte + valsCDup kv.CursorDupSort + dataC kv.Cursor } func (ht *HistoryTraceKeyDB) init() error { @@ -867,15 +759,14 @@ func (ht *HistoryTraceKeyDB) init() error { } func (ht *HistoryTraceKeyDB) Close() { - if ht.valsC != nil { - ht.valsC.Close() - ht.valsC = nil - } - if ht.valsCDup != nil { ht.valsCDup.Close() ht.valsCDup = nil } + if ht.dataC != nil { + ht.dataC.Close() + ht.dataC = nil + } } func (ht *HistoryTraceKeyDB) HasNext() bool { @@ -896,9 +787,6 @@ func (ht *HistoryTraceKeyDB) Next() (uint64, []byte, error) { } func (ht *HistoryTraceKeyDB) advance() error { - if ht.largeValues { - return ht.advanceLargeVals() - } return ht.advanceSmallVals() } @@ -908,8 +796,8 @@ func (ht *HistoryTraceKeyDB) advanceSmallVals() error { if ht.valsCDup, err = ht.roTx.CursorDupSort(ht.valsTable); err != nil { return err } - ht.startTxNumBytes = make([]byte, 8) - binary.BigEndian.PutUint64(ht.startTxNumBytes, ht.fromTxNum) + startTxNumBytes := make([]byte, 8) + binary.BigEndian.PutUint64(startTxNumBytes, ht.fromTxNum) k, _, err := ht.valsCDup.Seek(ht.key) if err != nil { return err @@ -919,7 +807,8 @@ func (ht *HistoryTraceKeyDB) advanceSmallVals() error { return nil } ht.k = ht.key - ht.v, err = ht.valsCDup.SeekBothRange(ht.key, ht.startTxNumBytes) + // InvIndexTable dup value is 8-byte txNum only + ht.v, err = ht.valsCDup.SeekBothRange(ht.key, startTxNumBytes) if err != nil { return err } @@ -938,53 +827,20 @@ func (ht *HistoryTraceKeyDB) advanceSmallVals() error { ht.txNum = binary.BigEndian.Uint64(ht.v) if ht.txNum >= ht.toTxNum { ht.k = nil + return nil } - ht.v = ht.v[8:] - ht.v = common.Copy(ht.v) - return nil -} - -func (ht *HistoryTraceKeyDB) advanceLargeVals() error { - var err error - if ht.valsC == nil { - if ht.valsC, err = ht.roTx.Cursor(ht.valsTable); err != nil { - return err - } - startTxNumBytes := make([]byte, 8) - binary.BigEndian.PutUint64(startTxNumBytes, ht.fromTxNum) - seek := append([]byte{}, append(ht.key, startTxNumBytes...)...) - firstKey, v, err := ht.valsC.Seek(seek) - if err != nil { + // DataTable lookup: txNum+key → prevVal + if ht.dataC == nil { + if ht.dataC, err = ht.roTx.Cursor(ht.dataTable); err != nil { return err } - if firstKey == nil || !bytes.Equal(firstKey[:len(firstKey)-8], ht.key) { - ht.k = nil - return nil - } - ht.k = firstKey - ht.txNum = binary.BigEndian.Uint64(firstKey[len(firstKey)-8:]) - if ht.txNum >= ht.toTxNum { - ht.k = nil - return nil - } - ht.v = v - return nil } - - ht.k, ht.v, err = ht.valsC.Next() + dataKey := binary.BigEndian.AppendUint64(nil, ht.txNum) + dataKey = append(dataKey, ht.key...) + _, ht.v, err = ht.dataC.SeekExact(dataKey) if err != nil { - return err + return fmt.Errorf("HistoryTraceKeyDB data lookup key %x txNum %d: %w", ht.key, ht.txNum, err) } - if ht.k == nil || !bytes.Equal(ht.k[:len(ht.k)-8], ht.key) { - ht.k = nil - return nil - } - foundTxNum := binary.BigEndian.Uint64(ht.k[len(ht.k)-8:]) - if foundTxNum >= ht.toTxNum { - ht.k = nil - return nil - } - ht.txNum = foundTxNum - + ht.v = common.Copy(ht.v) return nil } diff --git a/db/state/history_test.go b/db/state/history_test.go index fcf537e9899..755e6311b62 100644 --- a/db/state/history_test.go +++ b/db/state/history_test.go @@ -51,7 +51,7 @@ import ( "github.com/erigontech/erigon/execution/commitment/commitmentdb" ) -func testDbAndHistory(tb testing.TB, largeValues bool, logger log.Logger) (kv.RwDB, *History) { +func testDbAndHistory(tb testing.TB, logger log.Logger) (kv.RwDB, *History) { tb.Helper() dirs := datadir.New(tb.TempDir()) db := mdbx.New(dbcfg.ChainDB, logger).InMem(tb, dirs.Chaindata).MustOpen() @@ -62,7 +62,6 @@ func testDbAndHistory(tb testing.TB, largeValues bool, logger log.Logger) (kv.Rw cfg := statecfg.Schema.AccountsDomain cfg.Hist.IiCfg.Accessors = statecfg.AccessorHashMap - cfg.Hist.HistoryLargeValues = largeValues //perf of tests cfg.Hist.IiCfg.Compression = seg.CompressNone @@ -84,12 +83,12 @@ func TestHistoryCollationsAndBuilds(t *testing.T) { t.Parallel() - runTest := func(t *testing.T, largeValues bool) { + runTest := func(t *testing.T) { t.Helper() totalTx := uint64(1000) values := generateTestData(t, length.Addr, length.Addr+length.Hash, totalTx, 100, 10) - db, h := filledHistoryValues(t, largeValues, values, log.New()) + db, h := filledHistoryValues(t, values, log.New()) defer db.Close() ctx := context.Background() @@ -172,12 +171,7 @@ func TestHistoryCollationsAndBuilds(t *testing.T) { } } - t.Run("largeValues=true", func(t *testing.T) { - runTest(t, true) - }) - t.Run("largeValues=false", func(t *testing.T) { - runTest(t, false) - }) + runTest(t) } func TestHistoryCollationBuild(t *testing.T) { @@ -312,14 +306,8 @@ func TestHistoryCollationBuild(t *testing.T) { } } } - t.Run("large_values", func(t *testing.T) { - db, h := testDbAndHistory(t, true, logger) - test(t, h, db) - }) - t.Run("small_values", func(t *testing.T) { - db, h := testDbAndHistory(t, false, logger) - test(t, h, db) - }) + db, h := testDbAndHistory(t, logger) + test(t, h, db) } func TestHistoryAfterPrune(t *testing.T) { @@ -377,14 +365,9 @@ func TestHistoryAfterPrune(t *testing.T) { }() } } - t.Run("large_values", func(t *testing.T) { - db, h := testDbAndHistory(t, true, logger) - test(t, h, db) - }) - t.Run("small_values", func(t *testing.T) { - db, h := testDbAndHistory(t, false, logger) - test(t, h, db) - }) + + db, h := testDbAndHistory(t, logger) + test(t, h, db) } func TestHistoryRangeWithPrune(t *testing.T) { @@ -393,7 +376,7 @@ func TestHistoryRangeWithPrune(t *testing.T) { defer logEvery.Stop() ctx := context.Background() - db, h, _ := filledHistory(t, true, logger) + db, h, _ := filledHistory(t, logger) collateAndMergeHistory(t, db, h, 32, true) roTx, err := db.BeginRo(ctx) @@ -414,7 +397,7 @@ func TestHistoryRangeWithPrune(t *testing.T) { vals = append(vals, fmt.Sprintf("%x", v)) } - db2, h2, _ := filledHistory(t, true, logger) + db2, h2, _ := filledHistory(t, logger) collateAndMergeHistory(t, db2, h2, 32, false) roTx2, err := db2.BeginRo(ctx) @@ -447,7 +430,7 @@ func TestHistoryAsOfWithPrune(t *testing.T) { defer logEvery.Stop() ctx := context.Background() - db, h, _ := filledHistory(t, true, logger) + db, h, _ := filledHistory(t, logger) collateAndMergeHistory(t, db, h, 200, false) roTx, err := db.BeginRo(ctx) @@ -470,7 +453,7 @@ func TestHistoryAsOfWithPrune(t *testing.T) { vals = append(vals, fmt.Sprintf("%x", v)) } - db2, h2, _ := filledHistory(t, true, logger) + db2, h2, _ := filledHistory(t, logger) collateAndMergeHistory(t, db2, h2, 200, true) roTx2, err := db2.BeginRo(ctx) @@ -547,7 +530,7 @@ func TestHistoryCanPrune(t *testing.T) { if !testing.Short() { t.Run("withFiles", func(t *testing.T) { - db, h := testDbAndHistory(t, true, logger) + db, h := testDbAndHistory(t, logger) h.SnapshotsDisabled = false defer db.Close() @@ -584,7 +567,7 @@ func TestHistoryCanPrune(t *testing.T) { } t.Run("withoutFiles", func(t *testing.T) { - db, h := testDbAndHistory(t, false, logger) + db, h := testDbAndHistory(t, logger) h.SnapshotsDisabled = true h.KeepRecentTxnInDB = stepKeepInDB * h.stepSize @@ -629,8 +612,8 @@ func TestHistoryPruneCorrectnessWithFiles(t *testing.T) { setup := func(t *testing.T) (*History, kv.RwTx, *time.Ticker) { t.Helper() values := generateTestData(t, length.Addr, length.Addr, 1000, 1000, 1) - db, h := filledHistoryValues(t, true, values, log.New()) // registers Cleanup for db and h - h.KeepRecentTxnInDB = 900 // should be ignored since files are built + db, h := filledHistoryValues(t, values, log.New()) // registers Cleanup for db and h + h.KeepRecentTxnInDB = 900 // should be ignored since files are built t.Logf("step=%d\n", h.stepSize) collateAndMergeHistory(t, db, h, 500, false) @@ -647,12 +630,13 @@ func TestHistoryPruneCorrectnessWithFiles(t *testing.T) { assertResults := func(t *testing.T, h *History, rwTx kv.RwTx, hc *HistoryRoTx) { t.Helper() - icc, err := rwTx.CursorDupSort(h.ValuesTable) + // DataTable: non-DupSort, key = txNum(8)+addr. First() returns entry with smallest txNum. + dc, err := rwTx.Cursor(h.KeysTable) require.NoError(t, err) - defer icc.Close() - k, _, err := icc.First() + defer dc.Close() + k, _, err := dc.First() require.NoError(t, err) - require.EqualValues(t, nonPruned, binary.BigEndian.Uint64(k[len(k)-8:])) + require.EqualValues(t, nonPruned, binary.BigEndian.Uint64(k[:8])) itable, err := rwTx.CursorDupSort(hc.iit.ii.ValuesTable) require.NoError(t, err) @@ -744,7 +728,7 @@ func TestHistoryPruneCorrectness(t *testing.T) { setup := func(t *testing.T) (*History, kv.RwTx, *time.Ticker) { t.Helper() values := generateTestData(t, length.Addr, length.Addr, 1000, 1000, 1) - db, h := filledHistoryValues(t, true, values, log.New()) // registers Cleanup for db and h + db, h := filledHistoryValues(t, values, log.New()) // registers Cleanup for db and h logEvery := time.NewTicker(30 * time.Second) t.Cleanup(logEvery.Stop) @@ -753,18 +737,18 @@ func TestHistoryPruneCorrectness(t *testing.T) { require.NoError(t, err) t.Cleanup(rwTx.Rollback) - var from, to [8]byte - binary.BigEndian.PutUint64(from[:], 0) + var to [8]byte binary.BigEndian.PutUint64(to[:], uint64(pruneIters)*pruneLimit) - icc, err := rwTx.CursorDupSort(h.ValuesTable) + // DataTable: non-DupSort, key = txNum(8)+addr, value = prevVal. Sorted txNum-first. + dc, err := rwTx.Cursor(h.KeysTable) require.NoError(t, err) - defer icc.Close() + defer dc.Close() count := 0 - for key, _, err := icc.Seek(from[:]); key != nil; key, _, err = icc.Next() { + for k, _, err := dc.First(); k != nil; k, _, err = dc.Next() { require.NoError(t, err) - if bytes.Compare(key[len(key)-8:], to[:]) >= 0 { + if bytes.Compare(k[:8], to[:]) >= 0 { break } count++ @@ -799,13 +783,13 @@ func TestHistoryPruneCorrectness(t *testing.T) { t.Logf("[%d] stats: %v", i, stat) } - icc, err := rwTx.CursorDupSort(h.ValuesTable) + dc2, err := rwTx.Cursor(h.KeysTable) require.NoError(t, err) - defer icc.Close() - key, _, err := icc.First() + defer dc2.Close() + key, _, err := dc2.First() require.NoError(t, err) require.NotNil(t, key) - require.EqualValues(t, pruneIters*int(pruneLimit), binary.BigEndian.Uint64(key[len(key)-8:])-1) + require.EqualValues(t, pruneIters*int(pruneLimit), binary.BigEndian.Uint64(key[:8])-1) }) t.Run("scan_prune", func(t *testing.T) { @@ -833,17 +817,17 @@ func TestHistoryPruneCorrectness(t *testing.T) { // require.NoError(t, err) // t.Logf("[%d] stats: %v", i, stat) //} - //icc, err := rwTx.CursorDupSort(h.ValuesTable) + //dc2, err := rwTx.Cursor(h.KeysTable) //require.NoError(t, err) - //defer icc.Close() - //key, _, err := icc.First() + //defer dc2.Close() + //key, _, err := dc2.First() //require.NoError(t, err) //require.NotNil(t, key) - //require.EqualValues(t, pruneIters*int(pruneLimit), binary.BigEndian.Uint64(key[len(key)-8:])-1) + //require.EqualValues(t, pruneIters*int(pruneLimit), binary.BigEndian.Uint64(key[:8])-1) }) } -func filledHistoryValues(tb testing.TB, largeValues bool, values map[string][]upd, logger log.Logger) (kv.RwDB, *History) { +func filledHistoryValues(tb testing.TB, values map[string][]upd, logger log.Logger) (kv.RwDB, *History) { tb.Helper() for key, upds := range values { @@ -852,7 +836,7 @@ func filledHistoryValues(tb testing.TB, largeValues bool, values map[string][]up } // history closed inside tb.Cleanup - db, h := testDbAndHistory(tb, largeValues, logger) + db, h := testDbAndHistory(tb, logger) tb.Cleanup(db.Close) tb.Cleanup(h.Close) @@ -897,9 +881,9 @@ func filledHistoryValues(tb testing.TB, largeValues bool, values map[string][]up return db, h } -func filledHistory(tb testing.TB, largeValues bool, logger log.Logger) (kv.RwDB, *History, uint64) { +func filledHistory(tb testing.TB, logger log.Logger) (kv.RwDB, *History, uint64) { tb.Helper() - db, h := testDbAndHistory(tb, largeValues, logger) + db, h := testDbAndHistory(tb, logger) ctx := context.Background() tx, err := db.BeginRw(ctx) require.NoError(tb, err) @@ -992,14 +976,9 @@ func TestHistoryHistory(t *testing.T) { collateAndMergeHistory(t, db, h, txs, true) checkHistoryHistory(t, h, txs) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } @@ -1085,14 +1064,8 @@ func TestHistoryMergeFiles(t *testing.T) { checkHistoryHistory(t, h, txs) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } func TestHistoryScanFiles(t *testing.T) { @@ -1120,16 +1093,9 @@ func TestHistoryScanFiles(t *testing.T) { checkHistoryHistory(t, h, txs) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - db.Close() - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - db.Close() - }) + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) + db.Close() } func TestHistoryRange1(t *testing.T) { @@ -1283,14 +1249,9 @@ func TestHistoryRange1(t *testing.T) { require.Equal([]string{"ff000000000003cf", "ff000000000001e7"}, vals) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } func TestHistoryRange2(t *testing.T) { @@ -1508,14 +1469,9 @@ func TestHistoryRange2(t *testing.T) { require.Equal(hexutil.MustDecodeHex("ff000000000003e7"), v) }) } - t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) - test(t, h, db, txs) - }) - t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) - test(t, h, db, txs) - }) + + db, h, txs := filledHistory(t, logger) + test(t, h, db, txs) } func TestScanStaticFilesH(t *testing.T) { @@ -1544,9 +1500,9 @@ func TestScanStaticFilesH(t *testing.T) { require.Equal(t, 0, len(h._visible.files)) } -func writeSomeHistory(tb testing.TB, largeValues bool, logger log.Logger) (kv.RwDB, *History, [][]byte, uint64) { +func writeSomeHistory(tb testing.TB, logger log.Logger) (kv.RwDB, *History, [][]byte, uint64) { tb.Helper() - db, h := testDbAndHistory(tb, largeValues, logger) + db, h := testDbAndHistory(tb, logger) ctx := context.Background() tx, err := db.BeginRw(ctx) require.NoError(tb, err) @@ -1654,16 +1610,8 @@ func Test_HistoryIterate_VariousKeysLen(t *testing.T) { require.Equal(fmt.Sprintf("%#x", writtenKeys), fmt.Sprintf("%#x", keys)) } - //LargeHistoryValues: don't support various keys len - //TODO: write hist test for non-various keys len - //t.Run("large_values", func(t *testing.T) { - // db, h, keys, txs := writeSomeHistory(t, true, logger) - // test(t, h, db, keys, txs) - //}) - t.Run("small_values", func(t *testing.T) { - db, h, keys, txs := writeSomeHistory(t, false, logger) - test(t, h, db, keys, txs) - }) + db, h, keys, txs := writeSomeHistory(t, logger) + test(t, h, db, keys, txs) } @@ -1675,7 +1623,7 @@ func TestHistory_OpenFolder(t *testing.T) { t.Parallel() logger := log.New() - db, h, txs := filledHistory(t, true, logger) + db, h, txs := filledHistory(t, logger) collateAndMergeHistory(t, db, h, txs, true) list := h._visibleFiles diff --git a/db/state/statecfg/state_schema.go b/db/state/statecfg/state_schema.go index 99f2dc0f71d..5eaa73e7b59 100644 --- a/db/state/statecfg/state_schema.go +++ b/db/state/statecfg/state_schema.go @@ -197,15 +197,14 @@ var Schema = SchemaGen{ Accessors: AccessorBTree | AccessorExistence, Hist: HistCfg{ - ValuesTable: kv.TblAccountHistoryVals, + ValuesTable: kv.TblAccountHistoryInvIdx, CompressorCfg: seg.DefaultCfg.WithValuesOnCompressedPage(64), Compression: seg.CompressNone, Accessors: AccessorHashMap, - HistoryLargeValues: false, - HistoryIdx: kv.AccountsHistoryIdx, + HistoryIdx: kv.AccountsHistoryIdx, IiCfg: InvIdxCfg{ - FilenameBase: kv.AccountsDomain.String(), KeysTable: kv.TblAccountHistoryKeys, ValuesTable: kv.TblAccountIdx, + FilenameBase: kv.AccountsDomain.String(), KeysTable: kv.TblAccountHistoryData, ValuesTable: kv.TblAccountIdx, CompressorCfg: seg.DefaultCfg, Accessors: AccessorHashMap, }, @@ -218,15 +217,14 @@ var Schema = SchemaGen{ Accessors: AccessorBTree | AccessorExistence, Hist: HistCfg{ - ValuesTable: kv.TblStorageHistoryVals, + ValuesTable: kv.TblStorageHistoryInvIdx, CompressorCfg: seg.DefaultCfg, Compression: seg.CompressNone, Accessors: AccessorHashMap, - HistoryLargeValues: false, - HistoryIdx: kv.StorageHistoryIdx, + HistoryIdx: kv.StorageHistoryIdx, IiCfg: InvIdxCfg{ - FilenameBase: kv.StorageDomain.String(), KeysTable: kv.TblStorageHistoryKeys, ValuesTable: kv.TblStorageIdx, + FilenameBase: kv.StorageDomain.String(), KeysTable: kv.TblStorageHistoryData, ValuesTable: kv.TblStorageIdx, CompressorCfg: seg.DefaultCfg, Accessors: AccessorHashMap, }, @@ -240,15 +238,14 @@ var Schema = SchemaGen{ LargeValues: true, Hist: HistCfg{ - ValuesTable: kv.TblCodeHistoryVals, + ValuesTable: kv.TblCodeHistoryInvIdx, CompressorCfg: seg.DefaultCfg, Compression: seg.CompressKeys | seg.CompressVals, Accessors: AccessorHashMap, - HistoryLargeValues: true, - HistoryIdx: kv.CodeHistoryIdx, + HistoryIdx: kv.CodeHistoryIdx, IiCfg: InvIdxCfg{ - FilenameBase: kv.CodeDomain.String(), KeysTable: kv.TblCodeHistoryKeys, ValuesTable: kv.TblCodeIdx, + FilenameBase: kv.CodeDomain.String(), KeysTable: kv.TblCodeHistoryData, ValuesTable: kv.TblCodeIdx, CompressorCfg: seg.DefaultCfg, Accessors: AccessorHashMap, }, @@ -262,19 +259,18 @@ var Schema = SchemaGen{ ReplaceKeysInValues: AggregatorSqueezeCommitmentValues, // when true, keys are replaced in values during merge once file range reaches threshold Hist: HistCfg{ - ValuesTable: kv.TblCommitmentHistoryVals, + ValuesTable: kv.TblCommitmentHistoryInvIdx, CompressorCfg: HistoryCompressCfg.WithValuesOnCompressedPage(64), Compression: seg.CompressNone, // seg.CompressKeys | seg.CompressVals, HistoryIdx: kv.CommitmentHistoryIdx, Accessors: AccessorHashMap, - HistoryLargeValues: false, HistoryValuesOnCompressedPage: 64, SnapshotsDisabled: true, HistoryDisabled: true, IiCfg: InvIdxCfg{ - FilenameBase: kv.CommitmentDomain.String(), KeysTable: kv.TblCommitmentHistoryKeys, ValuesTable: kv.TblCommitmentIdx, + FilenameBase: kv.CommitmentDomain.String(), KeysTable: kv.TblCommitmentHistoryData, ValuesTable: kv.TblCommitmentIdx, CompressorCfg: seg.DefaultCfg, Accessors: AccessorHashMap, }, @@ -288,15 +284,14 @@ var Schema = SchemaGen{ Accessors: AccessorBTree | AccessorExistence, Hist: HistCfg{ - ValuesTable: kv.TblReceiptHistoryVals, + ValuesTable: kv.TblReceiptHistoryInvIdx, CompressorCfg: seg.DefaultCfg, Compression: seg.CompressNone, Accessors: AccessorHashMap, - HistoryLargeValues: false, - HistoryIdx: kv.ReceiptHistoryIdx, + HistoryIdx: kv.ReceiptHistoryIdx, IiCfg: InvIdxCfg{ - FilenameBase: kv.ReceiptDomain.String(), KeysTable: kv.TblReceiptHistoryKeys, ValuesTable: kv.TblReceiptIdx, + FilenameBase: kv.ReceiptDomain.String(), KeysTable: kv.TblReceiptHistoryData, ValuesTable: kv.TblReceiptIdx, CompressorCfg: seg.DefaultCfg, Accessors: AccessorHashMap, }, @@ -310,19 +305,18 @@ var Schema = SchemaGen{ CompressCfg: DomainCompressCfg, Compression: seg.CompressNone, //seg.CompressKeys | seg.CompressVals, Hist: HistCfg{ - ValuesTable: kv.TblRCacheHistoryVals, + ValuesTable: kv.TblRCacheHistoryInvIdx, CompressorCfg: seg.Cfg{ValuesOnCompressedPage: 16}, Compression: seg.CompressNone, //seg.CompressKeys | seg.CompressVals, Accessors: AccessorHashMap, - HistoryLargeValues: true, - HistoryIdx: kv.RCacheHistoryIdx, + HistoryIdx: kv.RCacheHistoryIdx, SnapshotsDisabled: true, HistoryValuesOnCompressedPage: 16, IiCfg: InvIdxCfg{ Disable: true, // disable everything by default - FilenameBase: kv.RCacheDomain.String(), KeysTable: kv.TblRCacheHistoryKeys, ValuesTable: kv.TblRCacheIdx, + FilenameBase: kv.RCacheDomain.String(), KeysTable: kv.TblRCacheHistoryData, ValuesTable: kv.TblRCacheIdx, CompressorCfg: seg.DefaultCfg, Accessors: AccessorHashMap, }, diff --git a/db/state/statecfg/statecfg.go b/db/state/statecfg/statecfg.go index 3924b22cb71..910066a04eb 100644 --- a/db/state/statecfg/statecfg.go +++ b/db/state/statecfg/statecfg.go @@ -38,24 +38,14 @@ func (d DomainCfg) GetVersions() VersionTypes { type HistCfg struct { IiCfg InvIdxCfg - ValuesTable string // bucket for history values; key1+key2+txnNum -> oldValue , stores values BEFORE change + // ValuesTable is the InvIndexTable (DupSort): key → txNum + // The DataTable (non-DupSort, txNum+key → prev_value) is IiCfg.KeysTable + ValuesTable string // bucket for history inverted index; key -> txNum (DupSort, no embedded value) KeepRecentTxnInDB uint64 // When snapshotsDisabled=true, keepRecentTxnInDB is used to keep this amount of txn in db before pruning - // historyLargeValues: used to store values > 2kb (pageSize/2) - // small values - can be stored in more compact ways in db (DupSort feature) - // can't use DupSort optimization (aka. prefix-compression) if values size > 4kb - - // historyLargeValues=true - doesn't support keys of various length (all keys must have same length) - // not large: - // keys: txNum -> key1+key2 - // vals: key1+key2 -> txNum + value (DupSort) - // large: - // keys: txNum -> key1+key2 - // vals: key1+key2+txNum -> value (not DupSort) - HistoryLargeValues bool - SnapshotsDisabled bool // don't produce .v and .ef files, keep in db table. old data will be pruned anyway. - HistoryDisabled bool // skip all write operations to this History (even in DB) + SnapshotsDisabled bool // don't produce .v and .ef files, keep in db table. old data will be pruned anyway. + HistoryDisabled bool // skip all write operations to this History (even in DB) HistoryValuesOnCompressedPage int // deprecated, it is only for ver.0 snapshots backward compatibility From 30c1b71213dd98c72aa4773ab8fefc048440959c Mon Sep 17 00:00:00 2001 From: Alexey Sharov Date: Wed, 4 Mar 2026 14:54:53 +0700 Subject: [PATCH 2/4] save --- db/state/history.go | 1 + 1 file changed, 1 insertion(+) diff --git a/db/state/history.go b/db/state/history.go index f80c132a17d..e9c0169c6f9 100644 --- a/db/state/history.go +++ b/db/state/history.go @@ -1122,6 +1122,7 @@ func (ht *HistoryRoTx) pruneHistoryTables(ctx context.Context, rwTx kv.RwTx, txF } stat.PruneCountTx = txCount + stat.Progress = prune.Done if stat.MinTxNum == math.MaxUint64 { stat.MinTxNum = 0 } From 6dad7511657fcb96a38f904cfadbf7ed33aa98f1 Mon Sep 17 00:00:00 2001 From: Alexey Sharov Date: Tue, 10 Mar 2026 12:35:36 +0700 Subject: [PATCH 3/4] save --- db/state/history_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/db/state/history_test.go b/db/state/history_test.go index d1cc6796b26..c1923a036f5 100644 --- a/db/state/history_test.go +++ b/db/state/history_test.go @@ -1724,11 +1724,11 @@ func TestHistoryRange_DBOnly(t *testing.T) { } t.Run("large_values", func(t *testing.T) { - db, h, _ := filledHistory(t, true, logger) + db, h, _ := filledHistory(t, logger) test(t, h, db) }) t.Run("small_values", func(t *testing.T) { - db, h, _ := filledHistory(t, false, logger) + db, h, _ := filledHistory(t, logger) test(t, h, db) }) } @@ -1816,11 +1816,11 @@ func TestRangeAsOf_ValuesMatchHistorySeek(t *testing.T) { } t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) + db, h, txs := filledHistory(t, logger) test(t, h, db, txs) }) t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) + db, h, txs := filledHistory(t, logger) test(t, h, db, txs) }) } @@ -1871,11 +1871,11 @@ func TestHistoryRange_EmptyRange(t *testing.T) { } t.Run("large_values", func(t *testing.T) { - db, h, txs := filledHistory(t, true, logger) + db, h, txs := filledHistory(t, logger) test(t, h, db, txs) }) t.Run("small_values", func(t *testing.T) { - db, h, txs := filledHistory(t, false, logger) + db, h, txs := filledHistory(t, logger) test(t, h, db, txs) }) } @@ -1886,7 +1886,7 @@ func BenchmarkHistoryRange(b *testing.B) { logger := log.New() ctx := context.Background() - db, h, txs := filledHistory(b, true, logger) + db, h, txs := filledHistory(b, logger) collateAndMergeHistory(b, db, h, txs, true) tx, err := db.BeginRo(ctx) @@ -1915,7 +1915,7 @@ func BenchmarkRangeAsOf(b *testing.B) { logger := log.New() ctx := context.Background() - db, h, txs := filledHistory(b, true, logger) + db, h, txs := filledHistory(b, logger) collateAndMergeHistory(b, db, h, txs, true) tx, err := db.BeginRo(ctx) @@ -1960,7 +1960,7 @@ func BenchmarkHistoryRange_MultiFile(b *testing.B) { logger := log.New() ctx := context.Background() - db, h, txs := filledHistory(b, true, logger) + db, h, txs := filledHistory(b, logger) collateHistory(b, db, h, txs) tx, err := db.BeginRo(ctx) @@ -1989,7 +1989,7 @@ func BenchmarkRangeAsOf_MultiFile(b *testing.B) { logger := log.New() ctx := context.Background() - db, h, txs := filledHistory(b, true, logger) + db, h, txs := filledHistory(b, logger) collateHistory(b, db, h, txs) tx, err := db.BeginRo(ctx) From a0c5193dee5312533374c90399004bb70ca614dd Mon Sep 17 00:00:00 2001 From: Alexey Sharov Date: Tue, 10 Mar 2026 12:48:32 +0700 Subject: [PATCH 4/4] save --- db/migrations/history_table_format_change.go | 48 ++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 db/migrations/history_table_format_change.go diff --git a/db/migrations/history_table_format_change.go b/db/migrations/history_table_format_change.go new file mode 100644 index 00000000000..bbfd9780eef --- /dev/null +++ b/db/migrations/history_table_format_change.go @@ -0,0 +1,48 @@ +// Copyright 2024 The Erigon Authors +// This file is part of Erigon. +// +// Erigon is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Erigon is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with Erigon. If not, see . + +package migrations + +import ( + "context" + + "github.com/erigontech/erigon/common/dir" + "github.com/erigontech/erigon/common/log/v3" + "github.com/erigontech/erigon/db/datadir" + "github.com/erigontech/erigon/db/kv" +) + +// HistoryTableFormatChange removes chaindata so it gets rebuilt with the new history table format: +// - DataTable (formerly HistoryKeys): txNum+key → prev_value (non-DupSort, sequential writes) +// - InvIndexTable (formerly HistoryVals): key → txNum (DupSort, no embedded value) +var HistoryTableFormatChange = Migration{ + Name: "history_table_format_change", + Up: func(db kv.RwDB, dirs datadir.Dirs, progress []byte, BeforeCommit Callback, logger log.Logger) (err error) { + tx, err := db.BeginRw(context.Background()) + if err != nil { + return err + } + defer tx.Rollback() + + if err := BeforeCommit(tx, nil, true); err != nil { + return err + } + if err := dir.RemoveAll(dirs.Chaindata); err != nil { + return err + } + return nil + }, +}