From 066e74e955cadc14b39d428ce4cf4992dd75d9b0 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 12 Dec 2025 11:53:15 -0500 Subject: [PATCH 01/72] added initial backfill parameter/function scaffolding --- cmd/stellar-rpc/internal/config/main.go | 1 + cmd/stellar-rpc/internal/config/options.go | 45 ++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/cmd/stellar-rpc/internal/config/main.go b/cmd/stellar-rpc/internal/config/main.go index a52bafa4..8d5af317 100644 --- a/cmd/stellar-rpc/internal/config/main.go +++ b/cmd/stellar-rpc/internal/config/main.go @@ -17,6 +17,7 @@ type Config struct { Strict bool StellarCoreURL string + Backfill int32 CaptiveCoreStoragePath string StellarCoreBinaryPath string CaptiveCoreConfigPath string diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index cb6c0a92..bbfd307f 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -82,6 +82,26 @@ func (cfg *Config) options() Options { return nil }, }, + { + Name: "backfill", + Usage: "backfill database with `n` ledgers synchronously on startup", + ConfigKey: &cfg.Backfill, + DefaultValue: 0, + Validate: func(_ *Option) error { + // As above, we need to do this after the config is parsed. + // Hence we use a validator to run all backfilling after parsing. + + // Check if we need to backfill at all + + // ASK ABOUT: do we enforce a max backfill value? + if cfg.Backfill <= 0 { + return nil + } + err := runBackfill(cfg.Backfill, cfg.SQLiteDBPath) + + return nil + }, + }, { Name: "stellar-core-timeout", Usage: "Timeout used when submitting requests to stellar-core", @@ -747,3 +767,28 @@ type networkConfig struct { historyArchiveURLs []string networkPassphrase string } + +func runBackfill(n_backfill int, sqliteDBPath string) error { + logrus.Infof("Starting backfill of %d ledgers into the database at %s", n_backfill, sqliteDBPath) + var threshold time.Duration = 3 * time.Second + // startLedgerNum := getLatestLedgerNumInGCS() - n_backfill + for { + // 1.) Read data from CDP + + // 2.) Write to DB + + // 3.) Endian stuff + + // We read backfill ledgers starting from startledgerNum to GetLatestLedgerNumInGCS + // if getLatestLedgerNumInGCS() == startLedgerNum { + // Even if we've caught up, we need to make sure the latest ledger is not + // going to advance before the initial history window begins moving forward + // if latestLedgerInGCSAge < threshold { + // break + // } else { + // sleep(time_for_latest_ledger_in_gcs_to_advance - threshold) + // } + } + + return nil +} From c7a19e9627dc354498f7a610489a696e7c50ec09 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Mon, 15 Dec 2025 18:16:43 -0500 Subject: [PATCH 02/72] added scaffolding for ledger ingestion backfill --- cmd/stellar-rpc/internal/config/options.go | 107 ++++++++++++++++----- cmd/stellar-rpc/internal/daemon/daemon.go | 13 +++ 2 files changed, 94 insertions(+), 26 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index bbfd307f..8b58af17 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -2,6 +2,7 @@ package config import ( + "context" _ "embed" "errors" "fmt" @@ -18,7 +19,10 @@ import ( "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/network" "github.com/stellar/go-stellar-sdk/support/datastore" + supportlog "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/support/strutils" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/db" ) const ( @@ -88,17 +92,10 @@ func (cfg *Config) options() Options { ConfigKey: &cfg.Backfill, DefaultValue: 0, Validate: func(_ *Option) error { - // As above, we need to do this after the config is parsed. - // Hence we use a validator to run all backfilling after parsing. - - // Check if we need to backfill at all - - // ASK ABOUT: do we enforce a max backfill value? - if cfg.Backfill <= 0 { - return nil + // Ensure config is valid for backfill + if cfg.Backfill > 0 && !cfg.ServeLedgersFromDatastore { + return errors.New("backfill requires serving ledgers from datastore to be enabled") } - err := runBackfill(cfg.Backfill, cfg.SQLiteDBPath) - return nil }, }, @@ -768,27 +765,85 @@ type networkConfig struct { networkPassphrase string } -func runBackfill(n_backfill int, sqliteDBPath string) error { - logrus.Infof("Starting backfill of %d ledgers into the database at %s", n_backfill, sqliteDBPath) - var threshold time.Duration = 3 * time.Second +// This function backfills the local database with n ledgers from the datastore +// It is called by daemon.go if cfg.Backfill > 0 +func RunBackfill(cfg *Config, logger *supportlog.Entry, dbConn *db.DB, dsInfo DatastoreInfo) error { + var ( + n_backfill int32 = cfg.Backfill + chunk_size int32 = 6400 // number of ledgers to process in one batch + DBPath string = cfg.SQLiteDBPath + ) + ctx := context.Background() + + logger.Infof("Creating BufferedStorageBackend") + backend, err := makeBackend(dsInfo) + if err != nil { + return fmt.Errorf("could not create storage backend: %w", err) + } + defer backend.Close() + + logger.Infof("Starting backfill precheck for inserting %d ledgers into the database at %s", n_backfill, DBPath) + writtenWindows, err := runBackfillPrecheck(ctx, dbConn) + if err != nil { + return fmt.Errorf("backfill precheck failed: %w", err) + } + if len(writtenWindows) > 0 { + logger.Infof("Backfill precheck found %d already written ledger windows, skipping them", len(writtenWindows)) + } + + logger.Infof("Starting backfill of %d ledgers into the database at %s", n_backfill, DBPath) + // startLedgerNum := getLatestLedgerNumInGCS() - n_backfill for { - // 1.) Read data from CDP + // for i, chunk in chunks + // fetch ledgers in chunk + // write ledgers to DB + // if i % COMMIT_EVERY_N_CHUNKS == 0: + // commit transaction + break + } + + return nil +} - // 2.) Write to DB +func makeBackend(dsInfo DatastoreInfo) (*ledgerbackend.BufferedStorageBackend, error) { + backend, err := ledgerbackend.NewBufferedStorageBackend( + ledgerbackend.BufferedStorageBackendConfig{ + BufferSize: 512, + NumWorkers: 5, + RetryLimit: 3, + RetryWait: 5 * time.Second, + }, + dsInfo.Ds, + dsInfo.Schema, + ) + if err != nil { + return nil, err + } + return backend, nil +} - // 3.) Endian stuff +func runBackfillPrecheck(callerCtx context.Context, dbConn *db.DB) ([][]int, error) { + ctx, cancel := context.WithTimeout(callerCtx, 5*time.Second) + defer cancel() - // We read backfill ledgers starting from startledgerNum to GetLatestLedgerNumInGCS - // if getLatestLedgerNumInGCS() == startLedgerNum { - // Even if we've caught up, we need to make sure the latest ledger is not - // going to advance before the initial history window begins moving forward - // if latestLedgerInGCSAge < threshold { - // break - // } else { - // sleep(time_for_latest_ledger_in_gcs_to_advance - threshold) - // } + var windows [][]int + lastWrittenSeq, err := db.NewLedgerReader(dbConn).GetLatestLedgerSequence(ctx) + if err == db.ErrEmptyDB { + return windows, nil + } else if err != nil { + return nil, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) } - return nil + return windows, nil +} + +func getLatestLedgerNumInCDP() int32 { + return 0 +} + +type DatastoreInfo struct { + Ds datastore.DataStore + Schema datastore.DataStoreSchema + Config datastore.DataStoreConfig } diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index ae3bdd80..f6d3d0f4 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -193,6 +193,19 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { if cfg.ServeLedgersFromDatastore { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } + if cfg.Backfill > 0 { + err := config.RunBackfill(cfg, + logger, + daemon.db, + config.DatastoreInfo{ + Ds: daemon.dataStore, + Schema: daemon.dataStoreSchema, + Config: cfg.DataStoreConfig, + }) + if err != nil { + logger.WithError(err).Fatal("failed to backfill ledgers") + } + } daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive) daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) From e7a1b9341501c3658e380a4f8898659d76db8055 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Mon, 15 Dec 2025 19:24:02 -0500 Subject: [PATCH 03/72] expanded relevant helper functions --- cmd/stellar-rpc/internal/config/main.go | 2 +- cmd/stellar-rpc/internal/config/options.go | 51 ++++++++++++++++------ cmd/stellar-rpc/internal/daemon/daemon.go | 5 +++ 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/main.go b/cmd/stellar-rpc/internal/config/main.go index 8d5af317..1827258b 100644 --- a/cmd/stellar-rpc/internal/config/main.go +++ b/cmd/stellar-rpc/internal/config/main.go @@ -17,7 +17,7 @@ type Config struct { Strict bool StellarCoreURL string - Backfill int32 + Backfill uint32 CaptiveCoreStoragePath string StellarCoreBinaryPath string CaptiveCoreConfigPath string diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index 8b58af17..3d684384 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -769,8 +769,8 @@ type networkConfig struct { // It is called by daemon.go if cfg.Backfill > 0 func RunBackfill(cfg *Config, logger *supportlog.Entry, dbConn *db.DB, dsInfo DatastoreInfo) error { var ( - n_backfill int32 = cfg.Backfill - chunk_size int32 = 6400 // number of ledgers to process in one batch + n_backfill uint32 = cfg.Backfill + chunk_size uint32 = 6400 // number of ledgers to process in one batch DBPath string = cfg.SQLiteDBPath ) ctx := context.Background() @@ -793,7 +793,12 @@ func RunBackfill(cfg *Config, logger *supportlog.Entry, dbConn *db.DB, dsInfo Da logger.Infof("Starting backfill of %d ledgers into the database at %s", n_backfill, DBPath) - // startLedgerNum := getLatestLedgerNumInGCS() - n_backfill + // Determine current tip of the datastore + startLedgerNum, err := getLatestLedgerNumInCDP(ctx, dsInfo.Ds) + if err != nil { + return err + } + for { // for i, chunk in chunks // fetch ledgers in chunk @@ -823,23 +828,43 @@ func makeBackend(dsInfo DatastoreInfo) (*ledgerbackend.BufferedStorageBackend, e return backend, nil } -func runBackfillPrecheck(callerCtx context.Context, dbConn *db.DB) ([][]int, error) { - ctx, cancel := context.WithTimeout(callerCtx, 5*time.Second) - defer cancel() +func runBackfillPrecheck(callerCtx context.Context, dbConn *db.DB) ([][]uint32, error) { + ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) + defer cancelRunBackfill() - var windows [][]int - lastWrittenSeq, err := db.NewLedgerReader(dbConn).GetLatestLedgerSequence(ctx) - if err == db.ErrEmptyDB { - return windows, nil - } else if err != nil { + var windows [][]uint32 + lastWrittenSeq, err := getLastestLedgerNumInSqliteDB(ctx, dbConn) + if err != nil { return nil, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) } return windows, nil } -func getLatestLedgerNumInCDP() int32 { - return 0 +// Gets the latest ledger number stored in the local Sqlite DB +func getLastestLedgerNumInSqliteDB(callerCtx context.Context, dbConn *db.DB) (uint32, error) { + ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) + defer cancelRunBackfill() + + seq, err := db.NewLedgerReader(dbConn).GetLatestLedgerSequence(ctx) + if err == db.ErrEmptyDB { + return 0, nil + } else if err != nil { + return 0, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) + } + return seq, nil +} + +// Gets the latest ledger number stored in the cloud Datastore/datalake +func getLatestLedgerNumInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { + ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) + defer cancelRunBackfill() + + seq, err := datastore.FindLatestLedgerSequence(ctx, ds) + if err != nil { + return 0, fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) + } + return seq, nil } type DatastoreInfo struct { diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index f6d3d0f4..1f84fc7a 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -194,6 +194,11 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } if cfg.Backfill > 0 { + if cfg.Backfill > cfg.HistoryRetentionWindow { + logger.Warnf("backfill value (%d) exceeds history-retention-window (%d), setting backfill to history-retention-window value", cfg.Backfill, cfg.HistoryRetentionWindow) + cfg.Backfill = cfg.HistoryRetentionWindow + } + err := config.RunBackfill(cfg, logger, daemon.db, From 10fccc0f43c6765df3eeb503ad90ac1ea2803c97 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 16 Dec 2025 17:00:06 -0500 Subject: [PATCH 04/72] refactored code, made CLI arg a bool instead of int --- cmd/stellar-rpc/internal/config/main.go | 2 +- cmd/stellar-rpc/internal/config/options.go | 116 +-------------------- cmd/stellar-rpc/internal/daemon/daemon.go | 11 +- 3 files changed, 6 insertions(+), 123 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/main.go b/cmd/stellar-rpc/internal/config/main.go index 1827258b..62fa00ef 100644 --- a/cmd/stellar-rpc/internal/config/main.go +++ b/cmd/stellar-rpc/internal/config/main.go @@ -17,7 +17,7 @@ type Config struct { Strict bool StellarCoreURL string - Backfill uint32 + Backfill bool CaptiveCoreStoragePath string StellarCoreBinaryPath string CaptiveCoreConfigPath string diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index 3d684384..2d8bb223 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -2,7 +2,6 @@ package config import ( - "context" _ "embed" "errors" "fmt" @@ -19,10 +18,7 @@ import ( "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/network" "github.com/stellar/go-stellar-sdk/support/datastore" - supportlog "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/support/strutils" - - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/db" ) const ( @@ -90,10 +86,10 @@ func (cfg *Config) options() Options { Name: "backfill", Usage: "backfill database with `n` ledgers synchronously on startup", ConfigKey: &cfg.Backfill, - DefaultValue: 0, + DefaultValue: false, Validate: func(_ *Option) error { // Ensure config is valid for backfill - if cfg.Backfill > 0 && !cfg.ServeLedgersFromDatastore { + if cfg.Backfill && !cfg.ServeLedgersFromDatastore { return errors.New("backfill requires serving ledgers from datastore to be enabled") } return nil @@ -764,111 +760,3 @@ type networkConfig struct { historyArchiveURLs []string networkPassphrase string } - -// This function backfills the local database with n ledgers from the datastore -// It is called by daemon.go if cfg.Backfill > 0 -func RunBackfill(cfg *Config, logger *supportlog.Entry, dbConn *db.DB, dsInfo DatastoreInfo) error { - var ( - n_backfill uint32 = cfg.Backfill - chunk_size uint32 = 6400 // number of ledgers to process in one batch - DBPath string = cfg.SQLiteDBPath - ) - ctx := context.Background() - - logger.Infof("Creating BufferedStorageBackend") - backend, err := makeBackend(dsInfo) - if err != nil { - return fmt.Errorf("could not create storage backend: %w", err) - } - defer backend.Close() - - logger.Infof("Starting backfill precheck for inserting %d ledgers into the database at %s", n_backfill, DBPath) - writtenWindows, err := runBackfillPrecheck(ctx, dbConn) - if err != nil { - return fmt.Errorf("backfill precheck failed: %w", err) - } - if len(writtenWindows) > 0 { - logger.Infof("Backfill precheck found %d already written ledger windows, skipping them", len(writtenWindows)) - } - - logger.Infof("Starting backfill of %d ledgers into the database at %s", n_backfill, DBPath) - - // Determine current tip of the datastore - startLedgerNum, err := getLatestLedgerNumInCDP(ctx, dsInfo.Ds) - if err != nil { - return err - } - - for { - // for i, chunk in chunks - // fetch ledgers in chunk - // write ledgers to DB - // if i % COMMIT_EVERY_N_CHUNKS == 0: - // commit transaction - break - } - - return nil -} - -func makeBackend(dsInfo DatastoreInfo) (*ledgerbackend.BufferedStorageBackend, error) { - backend, err := ledgerbackend.NewBufferedStorageBackend( - ledgerbackend.BufferedStorageBackendConfig{ - BufferSize: 512, - NumWorkers: 5, - RetryLimit: 3, - RetryWait: 5 * time.Second, - }, - dsInfo.Ds, - dsInfo.Schema, - ) - if err != nil { - return nil, err - } - return backend, nil -} - -func runBackfillPrecheck(callerCtx context.Context, dbConn *db.DB) ([][]uint32, error) { - ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) - defer cancelRunBackfill() - - var windows [][]uint32 - lastWrittenSeq, err := getLastestLedgerNumInSqliteDB(ctx, dbConn) - if err != nil { - return nil, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) - } - - return windows, nil -} - -// Gets the latest ledger number stored in the local Sqlite DB -func getLastestLedgerNumInSqliteDB(callerCtx context.Context, dbConn *db.DB) (uint32, error) { - ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) - defer cancelRunBackfill() - - seq, err := db.NewLedgerReader(dbConn).GetLatestLedgerSequence(ctx) - if err == db.ErrEmptyDB { - return 0, nil - } else if err != nil { - return 0, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) - } - return seq, nil -} - -// Gets the latest ledger number stored in the cloud Datastore/datalake -func getLatestLedgerNumInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { - ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) - defer cancelRunBackfill() - - seq, err := datastore.FindLatestLedgerSequence(ctx, ds) - if err != nil { - return 0, fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) - } - return seq, nil -} - -type DatastoreInfo struct { - Ds datastore.DataStore - Schema datastore.DataStoreSchema - Config datastore.DataStoreConfig -} diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index 1f84fc7a..c62c8646 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -193,16 +193,11 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { if cfg.ServeLedgersFromDatastore { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } - if cfg.Backfill > 0 { - if cfg.Backfill > cfg.HistoryRetentionWindow { - logger.Warnf("backfill value (%d) exceeds history-retention-window (%d), setting backfill to history-retention-window value", cfg.Backfill, cfg.HistoryRetentionWindow) - cfg.Backfill = cfg.HistoryRetentionWindow - } - - err := config.RunBackfill(cfg, + if cfg.Backfill { + err := ingest.RunBackfill(cfg, logger, daemon.db, - config.DatastoreInfo{ + ingest.DatastoreInfo{ Ds: daemon.dataStore, Schema: daemon.dataStoreSchema, Config: cfg.DataStoreConfig, From ce14b8154c6f01e1a0f02adb5ff102527f7cf7bd Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 16 Dec 2025 17:00:46 -0500 Subject: [PATCH 05/72] moved major legwork to ingest folder --- cmd/stellar-rpc/internal/ingest/backfill.go | 130 ++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 cmd/stellar-rpc/internal/ingest/backfill.go diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go new file mode 100644 index 00000000..5e514776 --- /dev/null +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -0,0 +1,130 @@ +package ingest + +import ( + "context" + "fmt" + "time" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/support/datastore" + supportlog "github.com/stellar/go-stellar-sdk/support/log" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/config" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/db" +) + +const ( + // OneDayOfLedgers is (roughly) a 24 hour window of ledgers. + OneDayOfLedgers = 17280 + SevenDayOfLedgers = OneDayOfLedgers * 7 +) + +// This function backfills the local database with n ledgers from the datastore +// It is called by daemon.go if cfg.Backfill is true +func RunBackfill(cfg *config.Config, logger *supportlog.Entry, dbConn *db.DB, dsInfo DatastoreInfo) error { + var ( + n_backfill uint32 = cfg.HistoryRetentionWindow + chunk_size uint32 = 6400 // number of ledgers to process in one batch + DBPath string = cfg.SQLiteDBPath + ) + ctx := context.Background() + + logger.Infof("Creating BufferedStorageBackend") + backend, err := makeBackend(dsInfo) + if err != nil { + return fmt.Errorf("could not create storage backend: %w", err) + } + defer backend.Close() + + logger.Infof("Starting backfill precheck for inserting %d ledgers into the database at %s", n_backfill, DBPath) + writtenWindows, err := runBackfillPrecheck(ctx, dbConn) + if err != nil { + return fmt.Errorf("backfill precheck failed: %w", err) + } + if len(writtenWindows) > 0 { + logger.Infof("Backfill precheck found %d already written ledger windows, skipping them", len(writtenWindows)) + } + + logger.Infof("Starting backfill of %d ledgers into the database at %s", n_backfill, DBPath) + + // Determine current tip of the datastore + startLedgerNum, err := getLatestLedgerNumInCDP(ctx, dsInfo.Ds) + if err != nil { + return err + } + + for { + // for i, chunk in chunks + // fetch ledgers in chunk + // write ledgers to DB + // if i % COMMIT_EVERY_N_CHUNKS == 0: + // commit transaction + break + } + + return nil +} + +func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { + backend, err := ledgerbackend.NewBufferedStorageBackend( + ledgerbackend.BufferedStorageBackendConfig{ + BufferSize: 1024, + NumWorkers: 1000, + RetryLimit: 3, + RetryWait: 5 * time.Second, + }, + dsInfo.Ds, + dsInfo.Schema, + ) + if err != nil { + return nil, err + } + return backend, nil +} + +// Checks to ensure state of local DB is acceptable for backfilling +// If so, +func runBackfillPrecheck(callerCtx context.Context, dbConn *db.DB) ([][]uint32, error) { + ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) + defer cancelRunBackfill() + + var windows [][]uint32 + lastWrittenSeq, err := getLastestLedgerNumInSqliteDB(ctx, dbConn) + if err != nil { + return nil, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) + } + + return windows, nil +} + +// Gets the latest ledger number stored in the local Sqlite DB +func getLastestLedgerNumInSqliteDB(callerCtx context.Context, dbConn *db.DB) (uint32, error) { + ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) + defer cancelRunBackfill() + + seq, err := db.NewLedgerReader(dbConn).GetLatestLedgerSequence(ctx) + if err == db.ErrEmptyDB { + return 0, nil + } else if err != nil { + return 0, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) + } + return seq, nil +} + +// Gets the latest ledger number stored in the cloud Datastore/datalake +func getLatestLedgerNumInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { + ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) + defer cancelRunBackfill() + + seq, err := datastore.FindLatestLedgerSequence(ctx, ds) + if err != nil { + return 0, fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) + } + return seq, nil +} + +type DatastoreInfo struct { + Ds datastore.DataStore + Schema datastore.DataStoreSchema + Config datastore.DataStoreConfig +} From e27dee695b4db542cc2de656b69d72c7b643f277 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 16 Dec 2025 18:18:34 -0500 Subject: [PATCH 06/72] completed structure without backfilling logic --- cmd/stellar-rpc/internal/ingest/backfill.go | 141 ++++++++++++-------- 1 file changed, 88 insertions(+), 53 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 5e514776..c5e4f74c 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -15,56 +15,120 @@ import ( const ( // OneDayOfLedgers is (roughly) a 24 hour window of ledgers. - OneDayOfLedgers = 17280 - SevenDayOfLedgers = OneDayOfLedgers * 7 + OneDayOfLedgers = config.OneDayOfLedgers + SevenDayOfLedgers = config.OneDayOfLedgers * 7 + // Number of ledgers to read/write at a time during backfill + ChunkSize uint32 = 6400 ) // This function backfills the local database with n ledgers from the datastore // It is called by daemon.go if cfg.Backfill is true func RunBackfill(cfg *config.Config, logger *supportlog.Entry, dbConn *db.DB, dsInfo DatastoreInfo) error { + logger.Infof("Beginning backfill process") var ( - n_backfill uint32 = cfg.HistoryRetentionWindow - chunk_size uint32 = 6400 // number of ledgers to process in one batch - DBPath string = cfg.SQLiteDBPath + ctx context.Context = context.Background() + + nBackfill uint32 = cfg.HistoryRetentionWindow + localDbPath string = cfg.SQLiteDBPath + localDbReader db.LedgerReader = db.NewLedgerReader(dbConn) ) - ctx := context.Background() - logger.Infof("Creating BufferedStorageBackend") + logger.Infof("Creating LedgerBackend") backend, err := makeBackend(dsInfo) if err != nil { - return fmt.Errorf("could not create storage backend: %w", err) + return fmt.Errorf("could not create ledger backend: %w", err) } defer backend.Close() - logger.Infof("Starting backfill precheck for inserting %d ledgers into the database at %s", n_backfill, DBPath) - writtenWindows, err := runBackfillPrecheck(ctx, dbConn) + // Determine what ledgers have been written to local DB + ledgerRange, err := localDbReader.GetLedgerRange(ctx) + if err != db.ErrEmptyDB && err != nil { + return fmt.Errorf("error getting ledger range from local DB: %w", err) + } + maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, ledgerRange.FirstLedger.Sequence + + // Phase 0: precheck to ensure no gaps in local DB + if err != db.ErrEmptyDB { + logger.Infof("Starting precheck for backfilling the database at %s", localDbPath) + err := runDbVerify(ctx, localDbReader, minWrittenLedger, maxWrittenLedger) + if err != nil { + return fmt.Errorf("backfill precheck failed: %w", err) + } + } else { + logger.Infof("Local DB is empty, skipping precheck") + } + logger.Infof("Precheck passed, starting backfill of %d ledgers into the database at %s", nBackfill, localDbPath) + + // Phase 1: backfill backwards towards oldest ledger to put in DB + currentTipLedger, err := getLatestLedgerNumInCDP(ctx, dsInfo.Ds) + if err != nil { + return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) + } + lBound, rBound := currentTipLedger-nBackfill, min(minWrittenLedger, currentTipLedger) + err = runBackfillBackwards(dbConn, dsInfo, lBound, rBound) + if err != nil { + return fmt.Errorf("backfill backwards failed: %w", err) + } + + // Phase 2: backfill forwards towards latest ledger to put in DB + logger.Infof("Backward backfill of old ledgers complete, starting forward backfill to current tip") + currentTipLedger, err = getLatestLedgerNumInCDP(ctx, dsInfo.Ds) + if err != nil { + return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) + } + lBound, rBound = maxWrittenLedger+1, currentTipLedger + err = runBackfillForwards(dbConn, dsInfo, lBound, rBound) if err != nil { - return fmt.Errorf("backfill precheck failed: %w", err) + return fmt.Errorf("backfill forwards failed: %w", err) } - if len(writtenWindows) > 0 { - logger.Infof("Backfill precheck found %d already written ledger windows, skipping them", len(writtenWindows)) + + // Phase 3: verify no gaps in local DB after backfill + logger.Infof("Forward backfill complete, starting post-backfill verification") + err = runDbVerify(ctx, localDbReader, currentTipLedger-nBackfill, currentTipLedger-1) + if err != nil { + return fmt.Errorf("post-backfill verification failed: %w", err) } + logger.Infof("Backfill process complete") + + return nil +} + +// Checks to ensure state of local DB is acceptable for backfilling +func runDbVerify(callerCtx context.Context, ledgerReader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { + ctx, cancelPrecheck := context.WithTimeout(callerCtx, 5*time.Second) + defer cancelPrecheck() - logger.Infof("Starting backfill of %d ledgers into the database at %s", n_backfill, DBPath) + tx, err := db.LedgerReader.NewTx(ledgerReader, ctx) + defer tx.Done() - // Determine current tip of the datastore - startLedgerNum, err := getLatestLedgerNumInCDP(ctx, dsInfo.Ds) + chunks, err := tx.BatchGetLedgers(ctx, minLedgerSeq, maxLedgerSeq) if err != nil { - return err + return fmt.Errorf("db verify: could not batch get ledgers from DB: %w", err) } - for { - // for i, chunk in chunks - // fetch ledgers in chunk - // write ledgers to DB - // if i % COMMIT_EVERY_N_CHUNKS == 0: - // commit transaction - break + expectedSeq := minLedgerSeq + for _, chunk := range chunks { + if seq := uint32(chunk.Header.Header.LedgerSeq); seq != expectedSeq { + return fmt.Errorf("db verify: gap detected in local DB: expected seq %d, got %d", expectedSeq, seq) + } + expectedSeq++ } return nil } +// Backfills the local DB with ledgers in [lBound, rBound) from the cloud datastore +// Used to fill local DB backwards towards older ledgers +func runBackfillBackwards(dbConn *db.DB, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { + return nil +} + +// Backfills the local DB with ledgers in [lBound, rBound) from the cloud datastore +// Used to fill local DB backwards towards the current ledger tip +func runBackfillForwards(dbConn *db.DB, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { + return nil +} + func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { backend, err := ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ @@ -82,35 +146,6 @@ func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { return backend, nil } -// Checks to ensure state of local DB is acceptable for backfilling -// If so, -func runBackfillPrecheck(callerCtx context.Context, dbConn *db.DB) ([][]uint32, error) { - ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) - defer cancelRunBackfill() - - var windows [][]uint32 - lastWrittenSeq, err := getLastestLedgerNumInSqliteDB(ctx, dbConn) - if err != nil { - return nil, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) - } - - return windows, nil -} - -// Gets the latest ledger number stored in the local Sqlite DB -func getLastestLedgerNumInSqliteDB(callerCtx context.Context, dbConn *db.DB) (uint32, error) { - ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) - defer cancelRunBackfill() - - seq, err := db.NewLedgerReader(dbConn).GetLatestLedgerSequence(ctx) - if err == db.ErrEmptyDB { - return 0, nil - } else if err != nil { - return 0, fmt.Errorf("could not get latest ledger sequence from DB: %w", err) - } - return seq, nil -} - // Gets the latest ledger number stored in the cloud Datastore/datalake func getLatestLedgerNumInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) From 11da7030396baac56a0e3da31b85225e5d80bab2 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 17 Dec 2025 11:58:27 -0500 Subject: [PATCH 07/72] completed backfilling logic, untested code fully written --- cmd/stellar-rpc/internal/daemon/daemon.go | 15 +++- cmd/stellar-rpc/internal/ingest/backfill.go | 99 ++++++++++++++++----- 2 files changed, 91 insertions(+), 23 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index c62c8646..88f7e5b4 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -190,6 +190,10 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { feewindows := daemon.mustInitializeStorage(cfg) + daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive) + daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) + daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) + if cfg.ServeLedgersFromDatastore { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } @@ -197,6 +201,14 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { err := ingest.RunBackfill(cfg, logger, daemon.db, + db.NewReadWriter( + logger, + daemon.db, + daemon, + maxLedgerEntryWriteBatchSize, + cfg.HistoryRetentionWindow, + cfg.NetworkPassphrase, + ), ingest.DatastoreInfo{ Ds: daemon.dataStore, Schema: daemon.dataStoreSchema, @@ -206,9 +218,6 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { logger.WithError(err).Fatal("failed to backfill ledgers") } } - daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive) - daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) - daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) daemon.setupHTTPServers(cfg) daemon.registerMetrics() diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index c5e4f74c..e676ba79 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -8,6 +8,7 @@ import ( "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/datastore" supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/config" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/db" @@ -23,14 +24,14 @@ const ( // This function backfills the local database with n ledgers from the datastore // It is called by daemon.go if cfg.Backfill is true -func RunBackfill(cfg *config.Config, logger *supportlog.Entry, dbConn *db.DB, dsInfo DatastoreInfo) error { +func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.DB, localDbRW db.ReadWriter, dsInfo DatastoreInfo) error { logger.Infof("Beginning backfill process") var ( ctx context.Context = context.Background() nBackfill uint32 = cfg.HistoryRetentionWindow localDbPath string = cfg.SQLiteDBPath - localDbReader db.LedgerReader = db.NewLedgerReader(dbConn) + localDbReader db.LedgerReader = db.NewLedgerReader(localDbConn) ) logger.Infof("Creating LedgerBackend") @@ -38,11 +39,12 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, dbConn *db.DB, ds if err != nil { return fmt.Errorf("could not create ledger backend: %w", err) } + dsInfo.backend = backend defer backend.Close() // Determine what ledgers have been written to local DB ledgerRange, err := localDbReader.GetLedgerRange(ctx) - if err != db.ErrEmptyDB && err != nil { + if err != nil && err != db.ErrEmptyDB { return fmt.Errorf("error getting ledger range from local DB: %w", err) } maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, ledgerRange.FirstLedger.Sequence @@ -50,7 +52,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, dbConn *db.DB, ds // Phase 0: precheck to ensure no gaps in local DB if err != db.ErrEmptyDB { logger.Infof("Starting precheck for backfilling the database at %s", localDbPath) - err := runDbVerify(ctx, localDbReader, minWrittenLedger, maxWrittenLedger) + err := verifyDbGapless(ctx, localDbReader, minWrittenLedger, maxWrittenLedger) if err != nil { return fmt.Errorf("backfill precheck failed: %w", err) } @@ -60,31 +62,31 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, dbConn *db.DB, ds logger.Infof("Precheck passed, starting backfill of %d ledgers into the database at %s", nBackfill, localDbPath) // Phase 1: backfill backwards towards oldest ledger to put in DB - currentTipLedger, err := getLatestLedgerNumInCDP(ctx, dsInfo.Ds) + currentTipLedger, err := getLatestLedgerNumInCDP(ctx, backend) if err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } lBound, rBound := currentTipLedger-nBackfill, min(minWrittenLedger, currentTipLedger) - err = runBackfillBackwards(dbConn, dsInfo, lBound, rBound) + err = runBackfillBackwards(ctx, localDbRW, dsInfo, lBound, rBound) if err != nil { return fmt.Errorf("backfill backwards failed: %w", err) } // Phase 2: backfill forwards towards latest ledger to put in DB logger.Infof("Backward backfill of old ledgers complete, starting forward backfill to current tip") - currentTipLedger, err = getLatestLedgerNumInCDP(ctx, dsInfo.Ds) + currentTipLedger, err = getLatestLedgerNumInCDP(ctx, backend) if err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } lBound, rBound = maxWrittenLedger+1, currentTipLedger - err = runBackfillForwards(dbConn, dsInfo, lBound, rBound) + err = runBackfillForwards(ctx, localDbRW, dsInfo, lBound, rBound) if err != nil { return fmt.Errorf("backfill forwards failed: %w", err) } // Phase 3: verify no gaps in local DB after backfill logger.Infof("Forward backfill complete, starting post-backfill verification") - err = runDbVerify(ctx, localDbReader, currentTipLedger-nBackfill, currentTipLedger-1) + err = verifyDbGapless(ctx, localDbReader, currentTipLedger-nBackfill, currentTipLedger-1) if err != nil { return fmt.Errorf("post-backfill verification failed: %w", err) } @@ -94,11 +96,11 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, dbConn *db.DB, ds } // Checks to ensure state of local DB is acceptable for backfilling -func runDbVerify(callerCtx context.Context, ledgerReader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { +func verifyDbGapless(callerCtx context.Context, ledgerReader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { ctx, cancelPrecheck := context.WithTimeout(callerCtx, 5*time.Second) defer cancelPrecheck() - tx, err := db.LedgerReader.NewTx(ledgerReader, ctx) + tx, err := ledgerReader.NewTx(ctx) defer tx.Done() chunks, err := tx.BatchGetLedgers(ctx, minLedgerSeq, maxLedgerSeq) @@ -117,15 +119,71 @@ func runDbVerify(callerCtx context.Context, ledgerReader db.LedgerReader, minLed return nil } -// Backfills the local DB with ledgers in [lBound, rBound) from the cloud datastore +// Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers -func runBackfillBackwards(dbConn *db.DB, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { +// Returns the rightmost ledger +func runBackfillBackwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { + for rChunkBound := rBound; rChunkBound >= lBound; rChunkBound -= ChunkSize { + lChunkBound := max(lBound, rChunkBound-ChunkSize+1) + fmt.Printf("REMOVE: Backfilling ledgers [%d, %d)\n", lChunkBound, rChunkBound) + backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) + if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { + return fmt.Errorf("couldn't prepare range [%d, %d): %w", lChunkBound, rChunkBound, err) + } + + tx, err := ledgerRW.NewTx(callerCtx) + if err != nil { + return fmt.Errorf("couldn't create local db write tx: %w", err) + } + defer tx.Rollback() + // backendRpcDatastore := rpcdatastore.LedgerBackendFactory(dsInfo.backend.) + // ledgers, err := rpcdatastore.LedgerReader.GetLedgers(dsInfo.backend, ctx, lChunkBound, rChunkBound) + var ledger xdr.LedgerCloseMeta + for seq := lChunkBound; seq < rChunkBound; seq++ { + // Fetch ledger from backend + ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) + if err != nil { + return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) + } + if err := tx.LedgerWriter().InsertLedger(ledger); err != nil { + return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) + } + } + tx.Commit(ledger, nil) + } return nil } -// Backfills the local DB with ledgers in [lBound, rBound) from the cloud datastore +// Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func runBackfillForwards(dbConn *db.DB, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { +func runBackfillForwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { + for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { + rChunkBound := min(rBound, lChunkBound+ChunkSize-1) + fmt.Printf("REMOVE: Backfilling ledgers [%d, %d)\n", lChunkBound, rChunkBound) + backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) + if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { + return fmt.Errorf("couldn't prepare range [%d, %d): %w", lChunkBound, rChunkBound, err) + } + + tx, err := ledgerRW.NewTx(callerCtx) + if err != nil { + return fmt.Errorf("couldn't create local db write tx: %w", err) + } + defer tx.Rollback() + + var ledger xdr.LedgerCloseMeta + for seq := lChunkBound; seq < rChunkBound; seq++ { + // Fetch ledger from backend + ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) + if err != nil { + return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) + } + if err := tx.LedgerWriter().InsertLedger(ledger); err != nil { + return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) + } + } + tx.Commit(ledger, nil) + } return nil } @@ -147,11 +205,11 @@ func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { } // Gets the latest ledger number stored in the cloud Datastore/datalake -func getLatestLedgerNumInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { +func getLatestLedgerNumInCDP(callerCtx context.Context, backend ledgerbackend.LedgerBackend) (uint32, error) { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) defer cancelRunBackfill() - seq, err := datastore.FindLatestLedgerSequence(ctx, ds) + seq, err := backend.GetLatestLedgerSequence(ctx) if err != nil { return 0, fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) } @@ -159,7 +217,8 @@ func getLatestLedgerNumInCDP(callerCtx context.Context, ds datastore.DataStore) } type DatastoreInfo struct { - Ds datastore.DataStore - Schema datastore.DataStoreSchema - Config datastore.DataStoreConfig + Ds datastore.DataStore + Schema datastore.DataStoreSchema + Config datastore.DataStoreConfig + backend ledgerbackend.LedgerBackend } From a8555011d31d82c5dd0996a5c41ae053895f4bbe Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 17 Dec 2025 13:10:01 -0500 Subject: [PATCH 08/72] fixed several off-by-one issues, improved error handling --- cmd/stellar-rpc/internal/ingest/backfill.go | 59 ++++++++++++++------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index e676ba79..f7ed1711 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -27,7 +27,8 @@ const ( func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.DB, localDbRW db.ReadWriter, dsInfo DatastoreInfo) error { logger.Infof("Beginning backfill process") var ( - ctx context.Context = context.Background() + ctx context.Context = context.Background() + currentTipLedger uint32 nBackfill uint32 = cfg.HistoryRetentionWindow localDbPath string = cfg.SQLiteDBPath @@ -62,11 +63,10 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D logger.Infof("Precheck passed, starting backfill of %d ledgers into the database at %s", nBackfill, localDbPath) // Phase 1: backfill backwards towards oldest ledger to put in DB - currentTipLedger, err := getLatestLedgerNumInCDP(ctx, backend) - if err != nil { + if err := getLatestLedgerNumInCDP(ctx, backend, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } - lBound, rBound := currentTipLedger-nBackfill, min(minWrittenLedger, currentTipLedger) + lBound, rBound := max(currentTipLedger-nBackfill+1, 1), min(minWrittenLedger, currentTipLedger) err = runBackfillBackwards(ctx, localDbRW, dsInfo, lBound, rBound) if err != nil { return fmt.Errorf("backfill backwards failed: %w", err) @@ -74,8 +74,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D // Phase 2: backfill forwards towards latest ledger to put in DB logger.Infof("Backward backfill of old ledgers complete, starting forward backfill to current tip") - currentTipLedger, err = getLatestLedgerNumInCDP(ctx, backend) - if err != nil { + if err = getLatestLedgerNumInCDP(ctx, backend, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } lBound, rBound = maxWrittenLedger+1, currentTipLedger @@ -86,8 +85,11 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D // Phase 3: verify no gaps in local DB after backfill logger.Infof("Forward backfill complete, starting post-backfill verification") - err = verifyDbGapless(ctx, localDbReader, currentTipLedger-nBackfill, currentTipLedger-1) - if err != nil { + if err = getLatestLedgerNumInCDP(ctx, backend, ¤tTipLedger); err != nil { + return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) + } + startSeq, endSeq := max(currentTipLedger-nBackfill+1, 1), currentTipLedger + if err = verifyDbGapless(ctx, localDbReader, startSeq, endSeq); err != nil { return fmt.Errorf("post-backfill verification failed: %w", err) } logger.Infof("Backfill process complete") @@ -101,6 +103,9 @@ func verifyDbGapless(callerCtx context.Context, ledgerReader db.LedgerReader, mi defer cancelPrecheck() tx, err := ledgerReader.NewTx(ctx) + if err != nil { + return fmt.Errorf("db verify: failed to begin read transaction: %w", err) + } defer tx.Done() chunks, err := tx.BatchGetLedgers(ctx, minLedgerSeq, maxLedgerSeq) @@ -125,10 +130,10 @@ func verifyDbGapless(callerCtx context.Context, ledgerReader db.LedgerReader, mi func runBackfillBackwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { for rChunkBound := rBound; rChunkBound >= lBound; rChunkBound -= ChunkSize { lChunkBound := max(lBound, rChunkBound-ChunkSize+1) - fmt.Printf("REMOVE: Backfilling ledgers [%d, %d)\n", lChunkBound, rChunkBound) + fmt.Printf("REMOVE: Backfilling ledgers [%d, %d]\n", lChunkBound, rChunkBound) backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { - return fmt.Errorf("couldn't prepare range [%d, %d): %w", lChunkBound, rChunkBound, err) + return fmt.Errorf("couldn't prepare range [%d, %d]: %w", lChunkBound, rChunkBound, err) } tx, err := ledgerRW.NewTx(callerCtx) @@ -136,10 +141,9 @@ func runBackfillBackwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsI return fmt.Errorf("couldn't create local db write tx: %w", err) } defer tx.Rollback() - // backendRpcDatastore := rpcdatastore.LedgerBackendFactory(dsInfo.backend.) - // ledgers, err := rpcdatastore.LedgerReader.GetLedgers(dsInfo.backend, ctx, lChunkBound, rChunkBound) var ledger xdr.LedgerCloseMeta - for seq := lChunkBound; seq < rChunkBound; seq++ { + processed := false + for seq := lChunkBound; seq <= rChunkBound; seq++ { // Fetch ledger from backend ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) if err != nil { @@ -148,8 +152,14 @@ func runBackfillBackwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsI if err := tx.LedgerWriter().InsertLedger(ledger); err != nil { return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) } + processed = true + } + if processed { + if err := tx.Commit(ledger, nil); err != nil { + return fmt.Errorf("couldn't commit range [%d, %d]: %w", lChunkBound, rChunkBound, err) + } + fmt.Printf("REMOVE: Committed ledgers [%d, %d]\n", lChunkBound, rChunkBound) } - tx.Commit(ledger, nil) } return nil } @@ -162,7 +172,7 @@ func runBackfillForwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsIn fmt.Printf("REMOVE: Backfilling ledgers [%d, %d)\n", lChunkBound, rChunkBound) backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { - return fmt.Errorf("couldn't prepare range [%d, %d): %w", lChunkBound, rChunkBound, err) + return fmt.Errorf("couldn't prepare range [%d, %d]: %w", lChunkBound, rChunkBound, err) } tx, err := ledgerRW.NewTx(callerCtx) @@ -172,7 +182,8 @@ func runBackfillForwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsIn defer tx.Rollback() var ledger xdr.LedgerCloseMeta - for seq := lChunkBound; seq < rChunkBound; seq++ { + processed := false + for seq := lChunkBound; seq <= rChunkBound; seq++ { // Fetch ledger from backend ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) if err != nil { @@ -181,8 +192,14 @@ func runBackfillForwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsIn if err := tx.LedgerWriter().InsertLedger(ledger); err != nil { return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) } + processed = true + } + if processed { + if err := tx.Commit(ledger, nil); err != nil { + return fmt.Errorf("couldn't commit range [%d, %d]: %w", lChunkBound, rChunkBound, err) + } + fmt.Printf("REMOVE: Committed ledgers [%d, %d]\n", lChunkBound, rChunkBound) } - tx.Commit(ledger, nil) } return nil } @@ -205,15 +222,17 @@ func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { } // Gets the latest ledger number stored in the cloud Datastore/datalake -func getLatestLedgerNumInCDP(callerCtx context.Context, backend ledgerbackend.LedgerBackend) (uint32, error) { +// Stores it in tip pointer +func getLatestLedgerNumInCDP(callerCtx context.Context, backend ledgerbackend.LedgerBackend, tip *uint32) error { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) defer cancelRunBackfill() seq, err := backend.GetLatestLedgerSequence(ctx) if err != nil { - return 0, fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) + return fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) } - return seq, nil + *tip = seq + return nil } type DatastoreInfo struct { From bb49dcc8bba708fbf7d9ac4f302839c8e81823c9 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 17 Dec 2025 18:10:42 -0500 Subject: [PATCH 09/72] refactored code, discovered monotonicity constract in storage backend --- cmd/stellar-rpc/internal/daemon/daemon.go | 35 ++--- cmd/stellar-rpc/internal/ingest/backfill.go | 157 +++++++++++--------- 2 files changed, 104 insertions(+), 88 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index 88f7e5b4..14a08104 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -190,7 +190,16 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { feewindows := daemon.mustInitializeStorage(cfg) - daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive) + // Create the read-writer once and reuse in ingest service/backfill + rw := db.NewReadWriter( + logger, + daemon.db, + daemon, + maxLedgerEntryWriteBatchSize, + cfg.HistoryRetentionWindow, + cfg.NetworkPassphrase, + ) + daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) @@ -199,16 +208,9 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { } if cfg.Backfill { err := ingest.RunBackfill(cfg, - logger, + logger.WithField("subservice", "backfill"), daemon.db, - db.NewReadWriter( - logger, - daemon.db, - daemon, - maxLedgerEntryWriteBatchSize, - cfg.HistoryRetentionWindow, - cfg.NetworkPassphrase, - ), + rw, ingest.DatastoreInfo{ Ds: daemon.dataStore, Schema: daemon.dataStoreSchema, @@ -308,22 +310,15 @@ func createHighperfStellarCoreClient(cfg *config.Config) interfaces.FastCoreClie } func createIngestService(cfg *config.Config, logger *supportlog.Entry, daemon *Daemon, - feewindows *feewindow.FeeWindows, historyArchive *historyarchive.ArchiveInterface, + feewindows *feewindow.FeeWindows, historyArchive *historyarchive.ArchiveInterface, rw db.ReadWriter, ) *ingest.Service { onIngestionRetry := func(err error, _ time.Duration) { logger.WithError(err).Error("could not run ingestion. Retrying") } return ingest.NewService(ingest.Config{ - Logger: logger, - DB: db.NewReadWriter( - logger, - daemon.db, - daemon, - maxLedgerEntryWriteBatchSize, - cfg.HistoryRetentionWindow, - cfg.NetworkPassphrase, - ), + Logger: logger, + DB: rw, NetworkPassPhrase: cfg.NetworkPassphrase, Archive: *historyArchive, LedgerBackend: daemon.core, diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index f7ed1711..b22bb92f 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -19,7 +19,7 @@ const ( OneDayOfLedgers = config.OneDayOfLedgers SevenDayOfLedgers = config.OneDayOfLedgers * 7 // Number of ledgers to read/write at a time during backfill - ChunkSize uint32 = 6400 + ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes X minutes to process ) // This function backfills the local database with n ledgers from the datastore @@ -29,63 +29,74 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D var ( ctx context.Context = context.Background() currentTipLedger uint32 + dbIsEmpty bool = false nBackfill uint32 = cfg.HistoryRetentionWindow localDbPath string = cfg.SQLiteDBPath localDbReader db.LedgerReader = db.NewLedgerReader(localDbConn) ) - logger.Infof("Creating LedgerBackend") - backend, err := makeBackend(dsInfo) - if err != nil { + logger.Infof("Creating and setting LedgerBackend") + if err := makeBackend(&dsInfo); err != nil { return fmt.Errorf("could not create ledger backend: %w", err) } - dsInfo.backend = backend - defer backend.Close() + defer dsInfo.backend.Close() // Determine what ledgers have been written to local DB ledgerRange, err := localDbReader.GetLedgerRange(ctx) if err != nil && err != db.ErrEmptyDB { return fmt.Errorf("error getting ledger range from local DB: %w", err) + } else if err == db.ErrEmptyDB { + dbIsEmpty = true } - maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, ledgerRange.FirstLedger.Sequence + maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, max(ledgerRange.FirstLedger.Sequence, 1) // Phase 0: precheck to ensure no gaps in local DB - if err != db.ErrEmptyDB { - logger.Infof("Starting precheck for backfilling the database at %s", localDbPath) - err := verifyDbGapless(ctx, localDbReader, minWrittenLedger, maxWrittenLedger) - if err != nil { + if !dbIsEmpty && (maxWrittenLedger >= minWrittenLedger) { + logger.Infof("Starting precheck for backfilling the database at %s, phase 1 of 4", localDbPath) + if err = verifyDbGapless(ctx, localDbReader, minWrittenLedger, maxWrittenLedger); err != nil { return fmt.Errorf("backfill precheck failed: %w", err) } } else { logger.Infof("Local DB is empty, skipping precheck") } - logger.Infof("Precheck passed, starting backfill of %d ledgers into the database at %s", nBackfill, localDbPath) + logger.Infof("Precheck passed! Starting backfill process, phase 2 of 4") // Phase 1: backfill backwards towards oldest ledger to put in DB - if err := getLatestLedgerNumInCDP(ctx, backend, ¤tTipLedger); err != nil { + if err := getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } - lBound, rBound := max(currentTipLedger-nBackfill+1, 1), min(minWrittenLedger, currentTipLedger) - err = runBackfillBackwards(ctx, localDbRW, dsInfo, lBound, rBound) - if err != nil { + logger.Debugf("Current tip ledger in cloud datastore is %d", currentTipLedger) + lBound := max(currentTipLedger-nBackfill+1, 1) + + var rBound uint32 + if dbIsEmpty { + rBound = currentTipLedger + } else { + rBound = minWrittenLedger - 1 + } + // min(minWrittenLedger, currentTipLedger)-1 + if err = runBackfillBackwards(ctx, logger, localDbRW, dsInfo, lBound, rBound); err != nil { return fmt.Errorf("backfill backwards failed: %w", err) } // Phase 2: backfill forwards towards latest ledger to put in DB - logger.Infof("Backward backfill of old ledgers complete, starting forward backfill to current tip") - if err = getLatestLedgerNumInCDP(ctx, backend, ¤tTipLedger); err != nil { + logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill to current tip, phase 3 of 4") + if err = getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } - lBound, rBound = maxWrittenLedger+1, currentTipLedger - err = runBackfillForwards(ctx, localDbRW, dsInfo, lBound, rBound) - if err != nil { + if dbIsEmpty { + lBound = max(currentTipLedger-nBackfill+1, 1) + } else { + lBound = maxWrittenLedger + 1 + } + if err = runBackfillForwards(ctx, logger, localDbRW, dsInfo, lBound, currentTipLedger); err != nil { return fmt.Errorf("backfill forwards failed: %w", err) } // Phase 3: verify no gaps in local DB after backfill logger.Infof("Forward backfill complete, starting post-backfill verification") - if err = getLatestLedgerNumInCDP(ctx, backend, ¤tTipLedger); err != nil { + if err = getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } startSeq, endSeq := max(currentTipLedger-nBackfill+1, 1), currentTipLedger @@ -121,16 +132,23 @@ func verifyDbGapless(callerCtx context.Context, ledgerReader db.LedgerReader, mi expectedSeq++ } + if expectedSeq--; expectedSeq != maxLedgerSeq { + return fmt.Errorf("db verify: missing ledgers at tail: ended at %d, expected %d", expectedSeq, maxLedgerSeq) + } + return nil } // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers // Returns the rightmost ledger -func runBackfillBackwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { - for rChunkBound := rBound; rChunkBound >= lBound; rChunkBound -= ChunkSize { +func runBackfillBackwards(callerCtx context.Context, logger *supportlog.Entry, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { + for rChunkBound := rBound; rChunkBound >= lBound; rChunkBound = max(lBound, rChunkBound-ChunkSize) { + if err := callerCtx.Err(); err != nil { + return err + } lChunkBound := max(lBound, rChunkBound-ChunkSize+1) - fmt.Printf("REMOVE: Backfilling ledgers [%d, %d]\n", lChunkBound, rChunkBound) + logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]\n", lChunkBound, rChunkBound) backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { return fmt.Errorf("couldn't prepare range [%d, %d]: %w", lChunkBound, rChunkBound, err) @@ -141,35 +159,24 @@ func runBackfillBackwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsI return fmt.Errorf("couldn't create local db write tx: %w", err) } defer tx.Rollback() - var ledger xdr.LedgerCloseMeta - processed := false - for seq := lChunkBound; seq <= rChunkBound; seq++ { - // Fetch ledger from backend - ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) - if err != nil { - return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) - } - if err := tx.LedgerWriter().InsertLedger(ledger); err != nil { - return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) - } - processed = true - } - if processed { - if err := tx.Commit(ledger, nil); err != nil { - return fmt.Errorf("couldn't commit range [%d, %d]: %w", lChunkBound, rChunkBound, err) - } - fmt.Printf("REMOVE: Committed ledgers [%d, %d]\n", lChunkBound, rChunkBound) + + if err := fillChunk(callerCtx, dsInfo, tx, lChunkBound, rChunkBound); err != nil { + return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) } + logger.Infof("Backwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) } return nil } // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func runBackfillForwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { +func runBackfillForwards(callerCtx context.Context, logger *supportlog.Entry, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { + if err := callerCtx.Err(); err != nil { + return err + } rChunkBound := min(rBound, lChunkBound+ChunkSize-1) - fmt.Printf("REMOVE: Backfilling ledgers [%d, %d)\n", lChunkBound, rChunkBound) + logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { return fmt.Errorf("couldn't prepare range [%d, %d]: %w", lChunkBound, rChunkBound, err) @@ -179,32 +186,45 @@ func runBackfillForwards(callerCtx context.Context, ledgerRW db.ReadWriter, dsIn if err != nil { return fmt.Errorf("couldn't create local db write tx: %w", err) } - defer tx.Rollback() - var ledger xdr.LedgerCloseMeta - processed := false - for seq := lChunkBound; seq <= rChunkBound; seq++ { - // Fetch ledger from backend - ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) - if err != nil { - return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) - } - if err := tx.LedgerWriter().InsertLedger(ledger); err != nil { - return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) - } - processed = true + if err := fillChunk(callerCtx, dsInfo, tx, lChunkBound, rChunkBound); err != nil { + return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) + } + logger.Infof("Forwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) + + } + return nil +} + +func fillChunk(callerCtx context.Context, dsInfo DatastoreInfo, tx db.WriteTx, left uint32, right uint32) error { + var ledger xdr.LedgerCloseMeta + var err error + processed := false + for seq := left; seq <= right; seq++ { + // Fetch ledger from backend + ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) } - if processed { - if err := tx.Commit(ledger, nil); err != nil { - return fmt.Errorf("couldn't commit range [%d, %d]: %w", lChunkBound, rChunkBound, err) - } - fmt.Printf("REMOVE: Committed ledgers [%d, %d]\n", lChunkBound, rChunkBound) + if err = tx.LedgerWriter().InsertLedger(ledger); err != nil { + _ = tx.Rollback() + return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) + } + processed = true + } + if processed { + if err := tx.Commit(ledger, nil); err != nil { + _ = tx.Rollback() + return fmt.Errorf("couldn't commit range [%d, %d]: %w", left, right, err) } } return nil } -func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { +// Creates a buffered storage backend for the given datastore +// Sets it in the DatastoreInfo struct +func makeBackend(dsInfo *DatastoreInfo) error { backend, err := ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ BufferSize: 1024, @@ -216,18 +236,19 @@ func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { dsInfo.Schema, ) if err != nil { - return nil, err + return err } - return backend, nil + dsInfo.backend = backend + return nil } // Gets the latest ledger number stored in the cloud Datastore/datalake // Stores it in tip pointer -func getLatestLedgerNumInCDP(callerCtx context.Context, backend ledgerbackend.LedgerBackend, tip *uint32) error { +func getLatestSeqInCDP(callerCtx context.Context, ds datastore.DataStore, tip *uint32) error { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) defer cancelRunBackfill() - seq, err := backend.GetLatestLedgerSequence(ctx) + seq, err := datastore.FindLatestLedgerSequence(ctx, ds) if err != nil { return fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) } From 32ee4ce5ccefdaf2e0d6fc922e654c5f8a94ecd8 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 17 Dec 2025 19:54:47 -0500 Subject: [PATCH 10/72] working implemetnation, not tested robustly or on testnet --- cmd/stellar-rpc/internal/daemon/daemon.go | 8 +- cmd/stellar-rpc/internal/db/ledger.go | 18 +++++ cmd/stellar-rpc/internal/ingest/backfill.go | 88 ++++++++++++++------- cmd/stellar-rpc/internal/methods/mocks.go | 5 ++ 4 files changed, 87 insertions(+), 32 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index 14a08104..7ea3dd5f 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -199,10 +199,6 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { cfg.HistoryRetentionWindow, cfg.NetworkPassphrase, ) - daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) - daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) - daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) - if cfg.ServeLedgersFromDatastore { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } @@ -221,6 +217,10 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { } } + daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) + daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) + daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) + daemon.setupHTTPServers(cfg) daemon.registerMetrics() diff --git a/cmd/stellar-rpc/internal/db/ledger.go b/cmd/stellar-rpc/internal/db/ledger.go index c28b1782..db3b38e4 100644 --- a/cmd/stellar-rpc/internal/db/ledger.go +++ b/cmd/stellar-rpc/internal/db/ledger.go @@ -34,6 +34,7 @@ type LedgerReaderTx interface { GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, bool, error) GetLedgerRange(ctx context.Context) (ledgerbucketwindow.LedgerRange, error) BatchGetLedgers(ctx context.Context, start uint32, end uint32) ([]LedgerMetadataChunk, error) + CountLedgersInRange(ctx context.Context, start uint32, end uint32) (uint32, error) Done() error } @@ -117,6 +118,23 @@ func (l ledgerReaderTx) GetLedger(ctx context.Context, sequence uint32) (xdr.Led return getLedgerFromDB(ctx, l.tx, sequence) } +func (l ledgerReaderTx) CountLedgersInRange(ctx context.Context, start uint32, end uint32) (uint32, error) { + sql := sq.Select("COUNT(*)").From(ledgerCloseMetaTableName). + Where(sq.And{ + sq.GtOrEq{"sequence": start}, + sq.LtOrEq{"sequence": end}, + }) + + var ct []uint32 + if err := l.tx.Select(ctx, &ct, sql); err != nil { + return 0, err + } + if len(ct) != 1 { + return 0, fmt.Errorf("expected 1 count result, got %d", len(ct)) + } + return ct[0], nil +} + func (l ledgerReaderTx) Done() error { return l.tx.Rollback() } diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index b22bb92f..a07727fc 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -20,6 +20,8 @@ const ( SevenDayOfLedgers = config.OneDayOfLedgers * 7 // Number of ledgers to read/write at a time during backfill ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes X minutes to process + + ledgerCloseMetaTableName = "ledger_close_meta" // from ledger.go ) // This function backfills the local database with n ledgers from the datastore @@ -37,10 +39,12 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D ) logger.Infof("Creating and setting LedgerBackend") - if err := makeBackend(&dsInfo); err != nil { + backend, err := makeBackend(dsInfo) + if err != nil { return fmt.Errorf("could not create ledger backend: %w", err) } - defer dsInfo.backend.Close() + dsInfo.backend = backend + defer backend.Close() // Determine what ledgers have been written to local DB ledgerRange, err := localDbReader.GetLedgerRange(ctx) @@ -96,10 +100,12 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D // Phase 3: verify no gaps in local DB after backfill logger.Infof("Forward backfill complete, starting post-backfill verification") + // Note final ledger we've backfilled to + endSeq := currentTipLedger if err = getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } - startSeq, endSeq := max(currentTipLedger-nBackfill+1, 1), currentTipLedger + startSeq := max(currentTipLedger-nBackfill+1, 1) if err = verifyDbGapless(ctx, localDbReader, startSeq, endSeq); err != nil { return fmt.Errorf("post-backfill verification failed: %w", err) } @@ -109,32 +115,42 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D } // Checks to ensure state of local DB is acceptable for backfilling -func verifyDbGapless(callerCtx context.Context, ledgerReader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { - ctx, cancelPrecheck := context.WithTimeout(callerCtx, 5*time.Second) +func verifyDbGapless(callerCtx context.Context, reader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { + ctx, cancelPrecheck := context.WithTimeout(callerCtx, 4*time.Minute) defer cancelPrecheck() - tx, err := ledgerReader.NewTx(ctx) + tx, err := reader.NewTx(ctx) if err != nil { return fmt.Errorf("db verify: failed to begin read transaction: %w", err) } defer tx.Done() - chunks, err := tx.BatchGetLedgers(ctx, minLedgerSeq, maxLedgerSeq) + ct, err := tx.CountLedgersInRange(ctx, minLedgerSeq, maxLedgerSeq) if err != nil { - return fmt.Errorf("db verify: could not batch get ledgers from DB: %w", err) + return fmt.Errorf("db verify: could not count ledgers in local DB: %w", err) } - expectedSeq := minLedgerSeq - for _, chunk := range chunks { - if seq := uint32(chunk.Header.Header.LedgerSeq); seq != expectedSeq { - return fmt.Errorf("db verify: gap detected in local DB: expected seq %d, got %d", expectedSeq, seq) - } - expectedSeq++ + if ct != maxLedgerSeq-minLedgerSeq+1 { + return fmt.Errorf("db verify: gap detected in local DB: expected %d ledgers, got %d ledgers", + maxLedgerSeq-minLedgerSeq+1, ct) } - if expectedSeq--; expectedSeq != maxLedgerSeq { - return fmt.Errorf("db verify: missing ledgers at tail: ended at %d, expected %d", expectedSeq, maxLedgerSeq) - } + // chunks, err := tx.BatchGetLedgers(ctx, minLedgerSeq, maxLedgerSeq) + // if err != nil { + // return fmt.Errorf("db verify: could not batch get ledgers from DB: %w", err) + // } + + // expectedSeq := minLedgerSeq + // for _, chunk := range chunks { + // if seq := uint32(chunk.Header.Header.LedgerSeq); seq != expectedSeq { + // return fmt.Errorf("db verify: gap detected in local DB: expected seq %d, got %d", expectedSeq, seq) + // } + // expectedSeq++ + // } + + // if expectedSeq--; expectedSeq != maxLedgerSeq { + // return fmt.Errorf("db verify: missing ledgers at tail: ended at %d, expected %d", expectedSeq, maxLedgerSeq) + // } return nil } @@ -143,14 +159,21 @@ func verifyDbGapless(callerCtx context.Context, ledgerReader db.LedgerReader, mi // Used to fill local DB backwards towards older ledgers // Returns the rightmost ledger func runBackfillBackwards(callerCtx context.Context, logger *supportlog.Entry, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { - for rChunkBound := rBound; rChunkBound >= lBound; rChunkBound = max(lBound, rChunkBound-ChunkSize) { + for rChunkBound := rBound; rChunkBound >= lBound; { if err := callerCtx.Err(); err != nil { return err } + // Create temporary backend for backwards-filling chunks + tempBackend, err := makeBackend(dsInfo) + if err != nil { + return fmt.Errorf("couldn't create backend: %w", err) + } + defer tempBackend.Close() + lChunkBound := max(lBound, rChunkBound-ChunkSize+1) - logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]\n", lChunkBound, rChunkBound) + logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) - if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { + if err := tempBackend.PrepareRange(callerCtx, backfillRange); err != nil { return fmt.Errorf("couldn't prepare range [%d, %d]: %w", lChunkBound, rChunkBound, err) } @@ -160,10 +183,15 @@ func runBackfillBackwards(callerCtx context.Context, logger *supportlog.Entry, l } defer tx.Rollback() - if err := fillChunk(callerCtx, dsInfo, tx, lChunkBound, rChunkBound); err != nil { + if err := fillChunk(callerCtx, dsInfo, tx, &tempBackend, lChunkBound, rChunkBound); err != nil { return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) } logger.Infof("Backwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) + + if lChunkBound == lBound { + break + } + rChunkBound = lChunkBound - 1 } return nil } @@ -187,7 +215,7 @@ func runBackfillForwards(callerCtx context.Context, logger *supportlog.Entry, le return fmt.Errorf("couldn't create local db write tx: %w", err) } - if err := fillChunk(callerCtx, dsInfo, tx, lChunkBound, rChunkBound); err != nil { + if err := fillChunk(callerCtx, dsInfo, tx, nil, lChunkBound, rChunkBound); err != nil { return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) } logger.Infof("Forwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) @@ -196,13 +224,17 @@ func runBackfillForwards(callerCtx context.Context, logger *supportlog.Entry, le return nil } -func fillChunk(callerCtx context.Context, dsInfo DatastoreInfo, tx db.WriteTx, left uint32, right uint32) error { +func fillChunk(callerCtx context.Context, dsInfo DatastoreInfo, tx db.WriteTx, tempBackend *ledgerbackend.LedgerBackend, left uint32, right uint32) error { var ledger xdr.LedgerCloseMeta var err error + + if tempBackend == nil { + tempBackend = &dsInfo.backend + } processed := false for seq := left; seq <= right; seq++ { // Fetch ledger from backend - ledger, err = dsInfo.backend.GetLedger(callerCtx, seq) + ledger, err = (*tempBackend).GetLedger(callerCtx, seq) if err != nil { _ = tx.Rollback() return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) @@ -224,7 +256,7 @@ func fillChunk(callerCtx context.Context, dsInfo DatastoreInfo, tx db.WriteTx, l // Creates a buffered storage backend for the given datastore // Sets it in the DatastoreInfo struct -func makeBackend(dsInfo *DatastoreInfo) error { +func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { backend, err := ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ BufferSize: 1024, @@ -236,10 +268,10 @@ func makeBackend(dsInfo *DatastoreInfo) error { dsInfo.Schema, ) if err != nil { - return err + return nil, err } - dsInfo.backend = backend - return nil + + return backend, nil } // Gets the latest ledger number stored in the cloud Datastore/datalake diff --git a/cmd/stellar-rpc/internal/methods/mocks.go b/cmd/stellar-rpc/internal/methods/mocks.go index c47338cb..c36ea5d0 100644 --- a/cmd/stellar-rpc/internal/methods/mocks.go +++ b/cmd/stellar-rpc/internal/methods/mocks.go @@ -69,6 +69,11 @@ func (m *MockLedgerReaderTx) BatchGetLedgers(ctx context.Context, start, end uin return args.Get(0).([]db.LedgerMetadataChunk), args.Error(1) //nolint:forcetypeassert } +func (m *MockLedgerReaderTx) CountLedgersInRange(ctx context.Context, start, end uint32) (uint32, error) { + args := m.Called(ctx, start, end) + return args.Get(0).(uint32), args.Error(1) //nolint:forcetypeassert +} + func (m *MockLedgerReaderTx) GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, bool, error) { args := m.Called(ctx, sequence) return args.Get(0).(xdr.LedgerCloseMeta), args.Bool(1), args.Error(2) //nolint:forcetypeassert From 749da32605e8da33d3286cd0252de11079401d47 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 17 Dec 2025 21:03:30 -0500 Subject: [PATCH 11/72] fixed bug with backwards to forwards transition if starting from empty DB --- cmd/stellar-rpc/internal/ingest/backfill.go | 38 ++++++--------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index a07727fc..3e1e8975 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -20,8 +20,6 @@ const ( SevenDayOfLedgers = config.OneDayOfLedgers * 7 // Number of ledgers to read/write at a time during backfill ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes X minutes to process - - ledgerCloseMetaTableName = "ledger_close_meta" // from ledger.go ) // This function backfills the local database with n ledgers from the datastore @@ -47,6 +45,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D defer backend.Close() // Determine what ledgers have been written to local DB + // Note GetLedgerRange assumes lexicographical ordering of ledger files in datastore ledgerRange, err := localDbReader.GetLedgerRange(ctx) if err != nil && err != db.ErrEmptyDB { return fmt.Errorf("error getting ledger range from local DB: %w", err) @@ -55,7 +54,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D } maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, max(ledgerRange.FirstLedger.Sequence, 1) - // Phase 0: precheck to ensure no gaps in local DB + // Phase 1: precheck to ensure no gaps in local DB if !dbIsEmpty && (maxWrittenLedger >= minWrittenLedger) { logger.Infof("Starting precheck for backfilling the database at %s, phase 1 of 4", localDbPath) if err = verifyDbGapless(ctx, localDbReader, minWrittenLedger, maxWrittenLedger); err != nil { @@ -66,7 +65,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D } logger.Infof("Precheck passed! Starting backfill process, phase 2 of 4") - // Phase 1: backfill backwards towards oldest ledger to put in DB + // Phase 2: backfill backwards towards oldest ledger to put in DB if err := getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } @@ -79,26 +78,26 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D } else { rBound = minWrittenLedger - 1 } - // min(minWrittenLedger, currentTipLedger)-1 if err = runBackfillBackwards(ctx, logger, localDbRW, dsInfo, lBound, rBound); err != nil { return fmt.Errorf("backfill backwards failed: %w", err) } - // Phase 2: backfill forwards towards latest ledger to put in DB + // Phase 3: backfill forwards towards latest ledger to put in DB logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill to current tip, phase 3 of 4") - if err = getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { - return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) - } if dbIsEmpty { - lBound = max(currentTipLedger-nBackfill+1, 1) + lBound = rBound + 1 } else { + // If the DB wasn't empty initially, the backwards backfill filled up to minWrittenLedger-1 < maxWrittenLedger lBound = maxWrittenLedger + 1 } + if err = getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { + return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) + } if err = runBackfillForwards(ctx, logger, localDbRW, dsInfo, lBound, currentTipLedger); err != nil { return fmt.Errorf("backfill forwards failed: %w", err) } - // Phase 3: verify no gaps in local DB after backfill + // Phase 4: verify no gaps in local DB after backfill logger.Infof("Forward backfill complete, starting post-backfill verification") // Note final ledger we've backfilled to endSeq := currentTipLedger @@ -135,23 +134,6 @@ func verifyDbGapless(callerCtx context.Context, reader db.LedgerReader, minLedge maxLedgerSeq-minLedgerSeq+1, ct) } - // chunks, err := tx.BatchGetLedgers(ctx, minLedgerSeq, maxLedgerSeq) - // if err != nil { - // return fmt.Errorf("db verify: could not batch get ledgers from DB: %w", err) - // } - - // expectedSeq := minLedgerSeq - // for _, chunk := range chunks { - // if seq := uint32(chunk.Header.Header.LedgerSeq); seq != expectedSeq { - // return fmt.Errorf("db verify: gap detected in local DB: expected seq %d, got %d", expectedSeq, seq) - // } - // expectedSeq++ - // } - - // if expectedSeq--; expectedSeq != maxLedgerSeq { - // return fmt.Errorf("db verify: missing ledgers at tail: ended at %d, expected %d", expectedSeq, maxLedgerSeq) - // } - return nil } From 0214ae2d7b80a7495427c7b3dca60156803464ef Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 18 Dec 2025 14:04:02 -0500 Subject: [PATCH 12/72] patched bug in empty DB case --- cmd/stellar-rpc/internal/ingest/backfill.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 3e1e8975..6d2e39b9 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -35,7 +35,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D localDbPath string = cfg.SQLiteDBPath localDbReader db.LedgerReader = db.NewLedgerReader(localDbConn) ) - + logger.Infof("Starting initialization and precheck for backfilling the database at %s, phase 1 of 4", localDbPath) logger.Infof("Creating and setting LedgerBackend") backend, err := makeBackend(dsInfo) if err != nil { @@ -63,7 +63,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D } else { logger.Infof("Local DB is empty, skipping precheck") } - logger.Infof("Precheck passed! Starting backfill process, phase 2 of 4") + logger.Infof("Precheck passed! Starting backfill backwards, phase 2 of 4") // Phase 2: backfill backwards towards oldest ledger to put in DB if err := getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { From 4bf5abd96a82fbcd34b2af6b0b341b43eee50016 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 18 Dec 2025 15:24:55 -0500 Subject: [PATCH 13/72] large refactoring --- cmd/stellar-rpc/internal/ingest/backfill.go | 118 ++++++++++---------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 6d2e39b9..b2a2de5b 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -25,28 +25,31 @@ const ( // This function backfills the local database with n ledgers from the datastore // It is called by daemon.go if cfg.Backfill is true func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.DB, localDbRW db.ReadWriter, dsInfo DatastoreInfo) error { - logger.Infof("Beginning backfill process") + logger.Infof("Starting initialization and precheck for backfilling the database at %s (phase 1 of 4)", cfg.SQLiteDBPath) var ( ctx context.Context = context.Background() currentTipLedger uint32 dbIsEmpty bool = false nBackfill uint32 = cfg.HistoryRetentionWindow - localDbPath string = cfg.SQLiteDBPath localDbReader db.LedgerReader = db.NewLedgerReader(localDbConn) + metaInfo backfillMeta = backfillMeta{ + ctx: ctx, + logger: logger, + rw: localDbRW, + dsInfo: dsInfo, + } ) - logger.Infof("Starting initialization and precheck for backfilling the database at %s, phase 1 of 4", localDbPath) - logger.Infof("Creating and setting LedgerBackend") backend, err := makeBackend(dsInfo) if err != nil { return fmt.Errorf("could not create ledger backend: %w", err) } - dsInfo.backend = backend - defer backend.Close() + metaInfo.dsInfo.backend = backend + defer metaInfo.dsInfo.backend.Close() // Determine what ledgers have been written to local DB // Note GetLedgerRange assumes lexicographical ordering of ledger files in datastore - ledgerRange, err := localDbReader.GetLedgerRange(ctx) + ledgerRange, err := localDbReader.GetLedgerRange(metaInfo.ctx) if err != nil && err != db.ErrEmptyDB { return fmt.Errorf("error getting ledger range from local DB: %w", err) } else if err == db.ErrEmptyDB { @@ -56,20 +59,19 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D // Phase 1: precheck to ensure no gaps in local DB if !dbIsEmpty && (maxWrittenLedger >= minWrittenLedger) { - logger.Infof("Starting precheck for backfilling the database at %s, phase 1 of 4", localDbPath) - if err = verifyDbGapless(ctx, localDbReader, minWrittenLedger, maxWrittenLedger); err != nil { + if err = verifyDbGapless(metaInfo.ctx, localDbReader, minWrittenLedger, maxWrittenLedger); err != nil { return fmt.Errorf("backfill precheck failed: %w", err) } } else { - logger.Infof("Local DB is empty, skipping precheck") + metaInfo.logger.Infof("Local DB is empty, skipping precheck") } - logger.Infof("Precheck passed! Starting backfill backwards, phase 2 of 4") + metaInfo.logger.Infof("Precheck passed! Starting backfill backwards phase (phase 2 of 4)") // Phase 2: backfill backwards towards oldest ledger to put in DB - if err := getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { + if err := getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } - logger.Debugf("Current tip ledger in cloud datastore is %d", currentTipLedger) + metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) lBound := max(currentTipLedger-nBackfill+1, 1) var rBound uint32 @@ -78,37 +80,39 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D } else { rBound = minWrittenLedger - 1 } - if err = runBackfillBackwards(ctx, logger, localDbRW, dsInfo, lBound, rBound); err != nil { + metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", rBound, lBound) + if err = runBackfillBackwards(metaInfo, lBound, rBound); err != nil { return fmt.Errorf("backfill backwards failed: %w", err) } // Phase 3: backfill forwards towards latest ledger to put in DB - logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill to current tip, phase 3 of 4") + metaInfo.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") if dbIsEmpty { lBound = rBound + 1 } else { // If the DB wasn't empty initially, the backwards backfill filled up to minWrittenLedger-1 < maxWrittenLedger lBound = maxWrittenLedger + 1 } - if err = getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { + if err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } - if err = runBackfillForwards(ctx, logger, localDbRW, dsInfo, lBound, currentTipLedger); err != nil { + metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBound, currentTipLedger-1) + if err = runBackfillForwards(metaInfo, lBound, currentTipLedger-1); err != nil { return fmt.Errorf("backfill forwards failed: %w", err) } // Phase 4: verify no gaps in local DB after backfill - logger.Infof("Forward backfill complete, starting post-backfill verification") + metaInfo.logger.Infof("Forward backfill complete, starting post-backfill verification") // Note final ledger we've backfilled to - endSeq := currentTipLedger - if err = getLatestSeqInCDP(ctx, dsInfo.Ds, ¤tTipLedger); err != nil { + endSeq := currentTipLedger - 1 + if err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, ¤tTipLedger); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } startSeq := max(currentTipLedger-nBackfill+1, 1) - if err = verifyDbGapless(ctx, localDbReader, startSeq, endSeq); err != nil { + if err = verifyDbGapless(metaInfo.ctx, localDbReader, startSeq, endSeq); err != nil { return fmt.Errorf("post-backfill verification failed: %w", err) } - logger.Infof("Backfill process complete") + metaInfo.logger.Infof("Backfill process complete") return nil } @@ -139,36 +143,25 @@ func verifyDbGapless(callerCtx context.Context, reader db.LedgerReader, minLedge // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers -// Returns the rightmost ledger -func runBackfillBackwards(callerCtx context.Context, logger *supportlog.Entry, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { +func runBackfillBackwards(metaInfo backfillMeta, lBound uint32, rBound uint32) error { for rChunkBound := rBound; rChunkBound >= lBound; { - if err := callerCtx.Err(); err != nil { + if err := metaInfo.ctx.Err(); err != nil { return err } // Create temporary backend for backwards-filling chunks - tempBackend, err := makeBackend(dsInfo) + tempBackend, err := makeBackend(metaInfo.dsInfo) if err != nil { return fmt.Errorf("couldn't create backend: %w", err) } defer tempBackend.Close() lChunkBound := max(lBound, rChunkBound-ChunkSize+1) - logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) - if err := tempBackend.PrepareRange(callerCtx, backfillRange); err != nil { - return fmt.Errorf("couldn't prepare range [%d, %d]: %w", lChunkBound, rChunkBound, err) - } - - tx, err := ledgerRW.NewTx(callerCtx) - if err != nil { - return fmt.Errorf("couldn't create local db write tx: %w", err) - } - defer tx.Rollback() + metaInfo.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := fillChunk(callerCtx, dsInfo, tx, &tempBackend, lChunkBound, rChunkBound); err != nil { + if err := fillChunk(metaInfo, &tempBackend, lChunkBound, rChunkBound); err != nil { return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) } - logger.Infof("Backwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) + metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) if lChunkBound == lBound { break @@ -180,49 +173,48 @@ func runBackfillBackwards(callerCtx context.Context, logger *supportlog.Entry, l // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func runBackfillForwards(callerCtx context.Context, logger *supportlog.Entry, ledgerRW db.ReadWriter, dsInfo DatastoreInfo, lBound uint32, rBound uint32) error { +func runBackfillForwards(metaInfo backfillMeta, lBound uint32, rBound uint32) error { for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { - if err := callerCtx.Err(); err != nil { + if err := metaInfo.ctx.Err(); err != nil { return err } rChunkBound := min(rBound, lChunkBound+ChunkSize-1) - logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - backfillRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) - if err := dsInfo.backend.PrepareRange(callerCtx, backfillRange); err != nil { - return fmt.Errorf("couldn't prepare range [%d, %d]: %w", lChunkBound, rChunkBound, err) - } - - tx, err := ledgerRW.NewTx(callerCtx) - if err != nil { - return fmt.Errorf("couldn't create local db write tx: %w", err) - } + metaInfo.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := fillChunk(callerCtx, dsInfo, tx, nil, lChunkBound, rChunkBound); err != nil { + if err := fillChunk(metaInfo, &metaInfo.dsInfo.backend, lChunkBound, rChunkBound); err != nil { return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) } - logger.Infof("Forwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) + metaInfo.logger.Infof("Forwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) } return nil } -func fillChunk(callerCtx context.Context, dsInfo DatastoreInfo, tx db.WriteTx, tempBackend *ledgerbackend.LedgerBackend, left uint32, right uint32) error { +// Fills a chunk of ledgers [left, right] from the given backend into the local DB +// Fills from left to right (i.e. sequence number ascending) +func fillChunk(metaInfo backfillMeta, tempBackend *ledgerbackend.LedgerBackend, left uint32, right uint32) error { var ledger xdr.LedgerCloseMeta var err error - if tempBackend == nil { - tempBackend = &dsInfo.backend + tx, err := metaInfo.rw.NewTx(metaInfo.ctx) + if err != nil { + return fmt.Errorf("couldn't create local db write tx: %w", err) + } + defer tx.Rollback() + + backfillRange := ledgerbackend.BoundedRange(left, right) + if err := (*tempBackend).PrepareRange(metaInfo.ctx, backfillRange); err != nil { + return fmt.Errorf("couldn't prepare range [%d, %d]: %w", left, right, err) } + processed := false for seq := left; seq <= right; seq++ { // Fetch ledger from backend - ledger, err = (*tempBackend).GetLedger(callerCtx, seq) + ledger, err = (*tempBackend).GetLedger(metaInfo.ctx, seq) if err != nil { - _ = tx.Rollback() return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) } if err = tx.LedgerWriter().InsertLedger(ledger); err != nil { - _ = tx.Rollback() return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) } processed = true @@ -276,3 +268,11 @@ type DatastoreInfo struct { Config datastore.DataStoreConfig backend ledgerbackend.LedgerBackend } + +// This struct holds the metadata/constructs necessary for most backfilling operations +type backfillMeta struct { + ctx context.Context + logger *supportlog.Entry + rw db.ReadWriter + dsInfo DatastoreInfo +} From 885c22081a1e0237a6127f15fc73f46766e25813 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 18 Dec 2025 15:33:37 -0500 Subject: [PATCH 14/72] minor refactoring/bud ID --- cmd/stellar-rpc/internal/ingest/backfill.go | 29 +++++++++------------ 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index b2a2de5b..23443097 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -48,7 +48,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D defer metaInfo.dsInfo.backend.Close() // Determine what ledgers have been written to local DB - // Note GetLedgerRange assumes lexicographical ordering of ledger files in datastore + // POSSIBLE BUG SOURCE: GetLedgerRange assumes lexicographical ordering of ledger files in datastore ledgerRange, err := localDbReader.GetLedgerRange(metaInfo.ctx) if err != nil && err != db.ErrEmptyDB { return fmt.Errorf("error getting ledger range from local DB: %w", err) @@ -72,32 +72,29 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) - lBound := max(currentTipLedger-nBackfill+1, 1) - var rBound uint32 + var lBoundBackwards, rBoundBackwards uint32 + var lBoundForwards, rBoundForwards uint32 + lBoundBackwards = max(currentTipLedger-nBackfill+1, 1) if dbIsEmpty { - rBound = currentTipLedger + rBoundBackwards = currentTipLedger + lBoundForwards = rBoundBackwards + 1 } else { - rBound = minWrittenLedger - 1 + rBoundBackwards = minWrittenLedger - 1 + lBoundForwards = maxWrittenLedger + 1 } - metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", rBound, lBound) - if err = runBackfillBackwards(metaInfo, lBound, rBound); err != nil { + metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", lBoundBackwards, rBoundBackwards) + if err = runBackfillBackwards(metaInfo, lBoundBackwards, rBoundBackwards); err != nil { return fmt.Errorf("backfill backwards failed: %w", err) } // Phase 3: backfill forwards towards latest ledger to put in DB metaInfo.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") - if dbIsEmpty { - lBound = rBound + 1 - } else { - // If the DB wasn't empty initially, the backwards backfill filled up to minWrittenLedger-1 < maxWrittenLedger - lBound = maxWrittenLedger + 1 - } - if err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, ¤tTipLedger); err != nil { + if err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, &rBoundForwards); err != nil { return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) } - metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBound, currentTipLedger-1) - if err = runBackfillForwards(metaInfo, lBound, currentTipLedger-1); err != nil { + metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) + if err = runBackfillForwards(metaInfo, lBoundForwards, rBoundForwards); err != nil { return fmt.Errorf("backfill forwards failed: %w", err) } From cf02b2e49a5956fbb375eb2b4b322644e2d49d50 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 18 Dec 2025 21:53:51 -0500 Subject: [PATCH 15/72] changed to errors.wrap, cleaned up code --- cmd/stellar-rpc/internal/ingest/backfill.go | 146 ++++++++++---------- 1 file changed, 76 insertions(+), 70 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 23443097..2979e646 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -5,6 +5,8 @@ import ( "fmt" "time" + "github.com/pkg/errors" + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/datastore" supportlog "github.com/stellar/go-stellar-sdk/support/log" @@ -19,11 +21,12 @@ const ( OneDayOfLedgers = config.OneDayOfLedgers SevenDayOfLedgers = config.OneDayOfLedgers * 7 // Number of ledgers to read/write at a time during backfill - ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes X minutes to process + ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes [TODO] minutes to process ) -// This function backfills the local database with n ledgers from the datastore -// It is called by daemon.go if cfg.Backfill is true +// This function backfills the local database with ledgers from the datastore +// It is called by daemon.go if cfg.Backfill and cfg.ServeLedgersFromDatastore are true +// It requires that no sequence number gaps exist in the local DB prior to backfilling func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.DB, localDbRW db.ReadWriter, dsInfo DatastoreInfo) error { logger.Infof("Starting initialization and precheck for backfilling the database at %s (phase 1 of 4)", cfg.SQLiteDBPath) var ( @@ -42,37 +45,37 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D ) backend, err := makeBackend(dsInfo) if err != nil { - return fmt.Errorf("could not create ledger backend: %w", err) + return errors.Wrap(err, "could not create ledger backend") } metaInfo.dsInfo.backend = backend defer metaInfo.dsInfo.backend.Close() // Determine what ledgers have been written to local DB - // POSSIBLE BUG SOURCE: GetLedgerRange assumes lexicographical ordering of ledger files in datastore ledgerRange, err := localDbReader.GetLedgerRange(metaInfo.ctx) - if err != nil && err != db.ErrEmptyDB { - return fmt.Errorf("error getting ledger range from local DB: %w", err) - } else if err == db.ErrEmptyDB { + if errors.Is(err, db.ErrEmptyDB) { dbIsEmpty = true + } else if err != nil { + return errors.Wrap(err, "could not get ledger range from local DB") } - maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, max(ledgerRange.FirstLedger.Sequence, 1) + maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, ledgerRange.FirstLedger.Sequence // Phase 1: precheck to ensure no gaps in local DB - if !dbIsEmpty && (maxWrittenLedger >= minWrittenLedger) { - if err = verifyDbGapless(metaInfo.ctx, localDbReader, minWrittenLedger, maxWrittenLedger); err != nil { - return fmt.Errorf("backfill precheck failed: %w", err) + if !dbIsEmpty { + if err = verifyDbGapless(metaInfo.ctx, metaInfo.logger, localDbReader, minWrittenLedger, maxWrittenLedger); err != nil { + return errors.Wrap(err, "backfill precheck failed") } } else { metaInfo.logger.Infof("Local DB is empty, skipping precheck") } metaInfo.logger.Infof("Precheck passed! Starting backfill backwards phase (phase 2 of 4)") - // Phase 2: backfill backwards towards oldest ledger to put in DB - if err := getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, ¤tTipLedger); err != nil { - return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) + // Phase 2: backfill backwards from minimum written ledger towards oldest ledger in retention window + if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds); err != nil { + return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) + // Bounds for ledgers to be written to local DB in backwards and forwards phases var lBoundBackwards, rBoundBackwards uint32 var lBoundForwards, rBoundForwards uint32 lBoundBackwards = max(currentTipLedger-nBackfill+1, 1) @@ -83,56 +86,65 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D rBoundBackwards = minWrittenLedger - 1 lBoundForwards = maxWrittenLedger + 1 } - metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", lBoundBackwards, rBoundBackwards) - if err = runBackfillBackwards(metaInfo, lBoundBackwards, rBoundBackwards); err != nil { - return fmt.Errorf("backfill backwards failed: %w", err) + if lBoundBackwards < rBoundBackwards { + metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", + lBoundBackwards, rBoundBackwards) + if err = runBackfillBackwards(metaInfo, lBoundBackwards, rBoundBackwards); err != nil { + return errors.Wrap(err, "backfill backwards failed") + } + } else { + metaInfo.logger.Infof("No backwards backfill needed, local DB tail already covers retention window") } - // Phase 3: backfill forwards towards latest ledger to put in DB + // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB metaInfo.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") - if err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, &rBoundForwards); err != nil { - return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) + if rBoundForwards, err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds); err != nil { + return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) if err = runBackfillForwards(metaInfo, lBoundForwards, rBoundForwards); err != nil { - return fmt.Errorf("backfill forwards failed: %w", err) + return errors.Wrap(err, "backfill forwards failed") } // Phase 4: verify no gaps in local DB after backfill metaInfo.logger.Infof("Forward backfill complete, starting post-backfill verification") // Note final ledger we've backfilled to - endSeq := currentTipLedger - 1 - if err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds, ¤tTipLedger); err != nil { - return fmt.Errorf("could not get latest ledger number from cloud datastore: %w", err) + endSeq := rBoundForwards + if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds); err != nil { + return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } startSeq := max(currentTipLedger-nBackfill+1, 1) - if err = verifyDbGapless(metaInfo.ctx, localDbReader, startSeq, endSeq); err != nil { - return fmt.Errorf("post-backfill verification failed: %w", err) + if err = verifyDbGapless(metaInfo.ctx, metaInfo.logger, localDbReader, startSeq, endSeq); err != nil { + return errors.Wrap(err, "post-backfill verification failed") } - metaInfo.logger.Infof("Backfill process complete") + metaInfo.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", startSeq, endSeq) return nil } // Checks to ensure state of local DB is acceptable for backfilling -func verifyDbGapless(callerCtx context.Context, reader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { +func verifyDbGapless(callerCtx context.Context, logger *supportlog.Entry, reader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { ctx, cancelPrecheck := context.WithTimeout(callerCtx, 4*time.Minute) defer cancelPrecheck() tx, err := reader.NewTx(ctx) if err != nil { - return fmt.Errorf("db verify: failed to begin read transaction: %w", err) + return errors.Wrap(err, "db verify: failed to begin read transaction") } - defer tx.Done() + defer func() { + if rollbackErr := tx.Done(); rollbackErr != nil { + logger.Warnf("couldn't rollback in verifyDbGapless: %v", rollbackErr) + } + }() - ct, err := tx.CountLedgersInRange(ctx, minLedgerSeq, maxLedgerSeq) + count, err := tx.CountLedgersInRange(ctx, minLedgerSeq, maxLedgerSeq) if err != nil { - return fmt.Errorf("db verify: could not count ledgers in local DB: %w", err) + return errors.Wrap(err, "db verify: could not count ledgers in local DB") } - if ct != maxLedgerSeq-minLedgerSeq+1 { + if count != maxLedgerSeq-minLedgerSeq+1 { return fmt.Errorf("db verify: gap detected in local DB: expected %d ledgers, got %d ledgers", - maxLedgerSeq-minLedgerSeq+1, ct) + maxLedgerSeq-minLedgerSeq+1, count) } return nil @@ -148,17 +160,18 @@ func runBackfillBackwards(metaInfo backfillMeta, lBound uint32, rBound uint32) e // Create temporary backend for backwards-filling chunks tempBackend, err := makeBackend(metaInfo.dsInfo) if err != nil { - return fmt.Errorf("couldn't create backend: %w", err) + return errors.Wrap(err, "couldn't create backend") } defer tempBackend.Close() lChunkBound := max(lBound, rChunkBound-ChunkSize+1) metaInfo.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := fillChunk(metaInfo, &tempBackend, lChunkBound, rChunkBound); err != nil { - return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) + if err := fillChunk(metaInfo, tempBackend, lChunkBound, rChunkBound); err != nil { + return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) + metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", + lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/(rBound-lBound)) if lChunkBound == lBound { break @@ -178,55 +191,53 @@ func runBackfillForwards(metaInfo backfillMeta, lBound uint32, rBound uint32) er rChunkBound := min(rBound, lChunkBound+ChunkSize-1) metaInfo.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := fillChunk(metaInfo, &metaInfo.dsInfo.backend, lChunkBound, rChunkBound); err != nil { - return fmt.Errorf("couldn't fill chunk [%d, %d]: %w", lChunkBound, rChunkBound, err) + if err := fillChunk(metaInfo, metaInfo.dsInfo.backend, lChunkBound, rChunkBound); err != nil { + return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - metaInfo.logger.Infof("Forwards backfill: committed ledgers [%d, %d]", lChunkBound, rChunkBound) - + metaInfo.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", + lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/(rBound-lBound)) } return nil } // Fills a chunk of ledgers [left, right] from the given backend into the local DB // Fills from left to right (i.e. sequence number ascending) -func fillChunk(metaInfo backfillMeta, tempBackend *ledgerbackend.LedgerBackend, left uint32, right uint32) error { +func fillChunk(metaInfo backfillMeta, readBackend ledgerbackend.LedgerBackend, left uint32, right uint32) error { var ledger xdr.LedgerCloseMeta - var err error tx, err := metaInfo.rw.NewTx(metaInfo.ctx) if err != nil { - return fmt.Errorf("couldn't create local db write tx: %w", err) + return errors.Wrap(err, "couldn't create local db write tx") } - defer tx.Rollback() + // Log rollback errors on failure with where they failed + defer func() { + if rollbackErr := tx.Rollback(); rollbackErr != nil { + metaInfo.logger.Warnf("couldn't rollback in fillChunk: %v", rollbackErr) + } + }() backfillRange := ledgerbackend.BoundedRange(left, right) - if err := (*tempBackend).PrepareRange(metaInfo.ctx, backfillRange); err != nil { - return fmt.Errorf("couldn't prepare range [%d, %d]: %w", left, right, err) + if err := readBackend.PrepareRange(metaInfo.ctx, backfillRange); err != nil { + return errors.Wrapf(err, "couldn't prepare range [%d, %d]", left, right) } - processed := false for seq := left; seq <= right; seq++ { - // Fetch ledger from backend - ledger, err = (*tempBackend).GetLedger(metaInfo.ctx, seq) + // Fetch ledger from backend, commit to local DB + ledger, err = readBackend.GetLedger(metaInfo.ctx, seq) if err != nil { - return fmt.Errorf("couldn't get ledger %d from backend: %w", seq, err) + return errors.Wrapf(err, "couldn't get ledger %d from backend", seq) } if err = tx.LedgerWriter().InsertLedger(ledger); err != nil { - return fmt.Errorf("couldn't write ledger %d to local db: %w", seq, err) + return errors.Wrapf(err, "couldn't write ledger %d to local db", seq) } - processed = true } - if processed { - if err := tx.Commit(ledger, nil); err != nil { - _ = tx.Rollback() - return fmt.Errorf("couldn't commit range [%d, %d]: %w", left, right, err) - } + if err := tx.Commit(ledger, nil); err != nil { + return errors.Wrapf(err, "couldn't commit range [%d, %d]", left, right) } return nil } // Creates a buffered storage backend for the given datastore -// Sets it in the DatastoreInfo struct func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { backend, err := ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ @@ -238,25 +249,20 @@ func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { dsInfo.Ds, dsInfo.Schema, ) - if err != nil { - return nil, err - } - - return backend, nil + return backend, err } // Gets the latest ledger number stored in the cloud Datastore/datalake // Stores it in tip pointer -func getLatestSeqInCDP(callerCtx context.Context, ds datastore.DataStore, tip *uint32) error { +func getLatestSeqInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) defer cancelRunBackfill() seq, err := datastore.FindLatestLedgerSequence(ctx, ds) if err != nil { - return fmt.Errorf("could not get latest ledger sequence from datastore: %w", err) + return 0, errors.Wrap(err, "could not get latest ledger sequence from datastore") } - *tip = seq - return nil + return seq, nil } type DatastoreInfo struct { From 1499ef27b8b184c8675920729c9449a741265a7a Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 19 Dec 2025 02:47:35 -0500 Subject: [PATCH 16/72] major refactoring; design made pointer-receiver oriented --- cmd/stellar-rpc/internal/daemon/daemon.go | 13 ++-- cmd/stellar-rpc/internal/ingest/backfill.go | 82 +++++++++++---------- 2 files changed, 52 insertions(+), 43 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index 7ea3dd5f..c763710c 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -203,16 +203,19 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } if cfg.Backfill { - err := ingest.RunBackfill(cfg, - logger.WithField("subservice", "backfill"), - daemon.db, + backfillMeta := ingest.NewBackfillMeta( + context.Background(), + logger, rw, + db.NewLedgerReader(daemon.db), ingest.DatastoreInfo{ Ds: daemon.dataStore, Schema: daemon.dataStoreSchema, Config: cfg.DataStoreConfig, - }) - if err != nil { + }, + ) + + if err := backfillMeta.RunBackfill(cfg); err != nil { logger.WithError(err).Fatal("failed to backfill ledgers") } } diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 2979e646..a002d05e 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -24,44 +24,48 @@ const ( ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes [TODO] minutes to process ) +func NewBackfillMeta(ctx context.Context, logger *supportlog.Entry, rw db.ReadWriter, reader db.LedgerReader, dsInfo DatastoreInfo) BackfillMeta { + return BackfillMeta{ + ctx: ctx, + logger: logger, + rw: rw, + reader: reader, + dsInfo: dsInfo, + } +} + // This function backfills the local database with ledgers from the datastore -// It is called by daemon.go if cfg.Backfill and cfg.ServeLedgersFromDatastore are true -// It requires that no sequence number gaps exist in the local DB prior to backfilling -func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.DB, localDbRW db.ReadWriter, dsInfo DatastoreInfo) error { - logger.Infof("Starting initialization and precheck for backfilling the database at %s (phase 1 of 4)", cfg.SQLiteDBPath) - var ( - ctx context.Context = context.Background() - currentTipLedger uint32 - dbIsEmpty bool = false - - nBackfill uint32 = cfg.HistoryRetentionWindow - localDbReader db.LedgerReader = db.NewLedgerReader(localDbConn) - metaInfo backfillMeta = backfillMeta{ - ctx: ctx, - logger: logger, - rw: localDbRW, - dsInfo: dsInfo, - } - ) - backend, err := makeBackend(dsInfo) +// It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers +// Requires that no sequence number gaps exist in the local DB prior to backfilling +func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { + metaInfo.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") + nBackfill := cfg.HistoryRetentionWindow + + backend, err := makeBackend(metaInfo.dsInfo) if err != nil { return errors.Wrap(err, "could not create ledger backend") } metaInfo.dsInfo.backend = backend - defer metaInfo.dsInfo.backend.Close() + defer func() { + if err := metaInfo.dsInfo.backend.Close(); err != nil { + metaInfo.logger.Warnf("error closing ledger backend: %v", err) + } + }() // Determine what ledgers have been written to local DB - ledgerRange, err := localDbReader.GetLedgerRange(metaInfo.ctx) + var dbIsEmpty bool + ledgerRange, err := metaInfo.reader.GetLedgerRange(metaInfo.ctx) if errors.Is(err, db.ErrEmptyDB) { dbIsEmpty = true } else if err != nil { + dbIsEmpty = false return errors.Wrap(err, "could not get ledger range from local DB") } maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, ledgerRange.FirstLedger.Sequence // Phase 1: precheck to ensure no gaps in local DB if !dbIsEmpty { - if err = verifyDbGapless(metaInfo.ctx, metaInfo.logger, localDbReader, minWrittenLedger, maxWrittenLedger); err != nil { + if err = metaInfo.verifyDbGapless(minWrittenLedger, maxWrittenLedger); err != nil { return errors.Wrap(err, "backfill precheck failed") } } else { @@ -70,7 +74,8 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D metaInfo.logger.Infof("Precheck passed! Starting backfill backwards phase (phase 2 of 4)") // Phase 2: backfill backwards from minimum written ledger towards oldest ledger in retention window - if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds); err != nil { + var currentTipLedger uint32 + if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, metaInfo.dsInfo.Ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) @@ -89,7 +94,7 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D if lBoundBackwards < rBoundBackwards { metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", lBoundBackwards, rBoundBackwards) - if err = runBackfillBackwards(metaInfo, lBoundBackwards, rBoundBackwards); err != nil { + if err = metaInfo.runBackfillBackwards(lBoundBackwards, rBoundBackwards); err != nil { return errors.Wrap(err, "backfill backwards failed") } } else { @@ -98,11 +103,11 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB metaInfo.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") - if rBoundForwards, err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds); err != nil { + if rBoundForwards, err = getLatestSeqInCDP(metaInfo.ctx, metaInfo.dsInfo.Ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) - if err = runBackfillForwards(metaInfo, lBoundForwards, rBoundForwards); err != nil { + if err = metaInfo.runBackfillForwards(lBoundForwards, rBoundForwards); err != nil { return errors.Wrap(err, "backfill forwards failed") } @@ -110,11 +115,11 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D metaInfo.logger.Infof("Forward backfill complete, starting post-backfill verification") // Note final ledger we've backfilled to endSeq := rBoundForwards - if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, dsInfo.Ds); err != nil { + if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, metaInfo.dsInfo.Ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } startSeq := max(currentTipLedger-nBackfill+1, 1) - if err = verifyDbGapless(metaInfo.ctx, metaInfo.logger, localDbReader, startSeq, endSeq); err != nil { + if err = metaInfo.verifyDbGapless(startSeq, endSeq); err != nil { return errors.Wrap(err, "post-backfill verification failed") } metaInfo.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", startSeq, endSeq) @@ -123,17 +128,17 @@ func RunBackfill(cfg *config.Config, logger *supportlog.Entry, localDbConn *db.D } // Checks to ensure state of local DB is acceptable for backfilling -func verifyDbGapless(callerCtx context.Context, logger *supportlog.Entry, reader db.LedgerReader, minLedgerSeq uint32, maxLedgerSeq uint32) error { - ctx, cancelPrecheck := context.WithTimeout(callerCtx, 4*time.Minute) +func (metaInfo *BackfillMeta) verifyDbGapless(minLedgerSeq uint32, maxLedgerSeq uint32) error { + ctx, cancelPrecheck := context.WithTimeout(metaInfo.ctx, 4*time.Minute) defer cancelPrecheck() - tx, err := reader.NewTx(ctx) + tx, err := metaInfo.reader.NewTx(ctx) if err != nil { return errors.Wrap(err, "db verify: failed to begin read transaction") } defer func() { if rollbackErr := tx.Done(); rollbackErr != nil { - logger.Warnf("couldn't rollback in verifyDbGapless: %v", rollbackErr) + metaInfo.logger.Warnf("couldn't rollback in verifyDbGapless: %v", rollbackErr) } }() @@ -152,7 +157,7 @@ func verifyDbGapless(callerCtx context.Context, logger *supportlog.Entry, reader // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers -func runBackfillBackwards(metaInfo backfillMeta, lBound uint32, rBound uint32) error { +func (metaInfo *BackfillMeta) runBackfillBackwards(lBound uint32, rBound uint32) error { for rChunkBound := rBound; rChunkBound >= lBound; { if err := metaInfo.ctx.Err(); err != nil { return err @@ -167,7 +172,7 @@ func runBackfillBackwards(metaInfo backfillMeta, lBound uint32, rBound uint32) e lChunkBound := max(lBound, rChunkBound-ChunkSize+1) metaInfo.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := fillChunk(metaInfo, tempBackend, lChunkBound, rChunkBound); err != nil { + if err := metaInfo.fillChunk(tempBackend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -183,7 +188,7 @@ func runBackfillBackwards(metaInfo backfillMeta, lBound uint32, rBound uint32) e // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func runBackfillForwards(metaInfo backfillMeta, lBound uint32, rBound uint32) error { +func (metaInfo *BackfillMeta) runBackfillForwards(lBound uint32, rBound uint32) error { for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { if err := metaInfo.ctx.Err(); err != nil { return err @@ -191,7 +196,7 @@ func runBackfillForwards(metaInfo backfillMeta, lBound uint32, rBound uint32) er rChunkBound := min(rBound, lChunkBound+ChunkSize-1) metaInfo.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := fillChunk(metaInfo, metaInfo.dsInfo.backend, lChunkBound, rChunkBound); err != nil { + if err := metaInfo.fillChunk(metaInfo.dsInfo.backend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } metaInfo.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -202,7 +207,7 @@ func runBackfillForwards(metaInfo backfillMeta, lBound uint32, rBound uint32) er // Fills a chunk of ledgers [left, right] from the given backend into the local DB // Fills from left to right (i.e. sequence number ascending) -func fillChunk(metaInfo backfillMeta, readBackend ledgerbackend.LedgerBackend, left uint32, right uint32) error { +func (metaInfo *BackfillMeta) fillChunk(readBackend ledgerbackend.LedgerBackend, left uint32, right uint32) error { var ledger xdr.LedgerCloseMeta tx, err := metaInfo.rw.NewTx(metaInfo.ctx) @@ -273,9 +278,10 @@ type DatastoreInfo struct { } // This struct holds the metadata/constructs necessary for most backfilling operations -type backfillMeta struct { +type BackfillMeta struct { ctx context.Context logger *supportlog.Entry rw db.ReadWriter + reader db.LedgerReader dsInfo DatastoreInfo } From 965b479f677ecbc7429159a74bb33f27298f3300 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 19 Dec 2025 02:54:58 -0500 Subject: [PATCH 17/72] added context timeout for main function --- cmd/stellar-rpc/internal/ingest/backfill.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index a002d05e..6c8d26c4 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -38,6 +38,9 @@ func NewBackfillMeta(ctx context.Context, logger *supportlog.Entry, rw db.ReadWr // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { + ctx, cancelBackfill := context.WithTimeout(metaInfo.ctx, 4*time.Hour) // TODO: determine backfill timeout + defer cancelBackfill() + metaInfo.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") nBackfill := cfg.HistoryRetentionWindow @@ -54,7 +57,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Determine what ledgers have been written to local DB var dbIsEmpty bool - ledgerRange, err := metaInfo.reader.GetLedgerRange(metaInfo.ctx) + ledgerRange, err := metaInfo.reader.GetLedgerRange(ctx) if errors.Is(err, db.ErrEmptyDB) { dbIsEmpty = true } else if err != nil { @@ -75,7 +78,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Phase 2: backfill backwards from minimum written ledger towards oldest ledger in retention window var currentTipLedger uint32 - if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, metaInfo.dsInfo.Ds); err != nil { + if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.Ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) @@ -103,7 +106,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB metaInfo.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") - if rBoundForwards, err = getLatestSeqInCDP(metaInfo.ctx, metaInfo.dsInfo.Ds); err != nil { + if rBoundForwards, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.Ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) @@ -115,7 +118,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { metaInfo.logger.Infof("Forward backfill complete, starting post-backfill verification") // Note final ledger we've backfilled to endSeq := rBoundForwards - if currentTipLedger, err = getLatestSeqInCDP(metaInfo.ctx, metaInfo.dsInfo.Ds); err != nil { + if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.Ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } startSeq := max(currentTipLedger-nBackfill+1, 1) From b09a5260b1d8762db98bda029338b9ed53814c71 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 19 Dec 2025 11:56:12 -0500 Subject: [PATCH 18/72] further refactoring and edge case guarding --- cmd/stellar-rpc/internal/daemon/daemon.go | 8 +- cmd/stellar-rpc/internal/ingest/backfill.go | 124 +++++++++++--------- 2 files changed, 71 insertions(+), 61 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index c763710c..f76cb43e 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -204,15 +204,11 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { } if cfg.Backfill { backfillMeta := ingest.NewBackfillMeta( - context.Background(), logger, rw, db.NewLedgerReader(daemon.db), - ingest.DatastoreInfo{ - Ds: daemon.dataStore, - Schema: daemon.dataStoreSchema, - Config: cfg.DataStoreConfig, - }, + daemon.dataStore, + daemon.dataStoreSchema, ) if err := backfillMeta.RunBackfill(cfg); err != nil { diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 6c8d26c4..404ef1de 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -21,16 +21,38 @@ const ( OneDayOfLedgers = config.OneDayOfLedgers SevenDayOfLedgers = config.OneDayOfLedgers * 7 // Number of ledgers to read/write at a time during backfill - ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes [TODO] minutes to process + ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes 2.75 minutes to process on an M4 MacBook Pro ) -func NewBackfillMeta(ctx context.Context, logger *supportlog.Entry, rw db.ReadWriter, reader db.LedgerReader, dsInfo DatastoreInfo) BackfillMeta { +// This struct holds the metadata/constructs necessary for most backfilling operations, including +// the local database reader and writer, the cloud datastore info, and a logger. +type BackfillMeta struct { + logger *supportlog.Entry + rw db.ReadWriter + reader db.LedgerReader + dsInfo datastoreInfo +} + +type datastoreInfo struct { + ds datastore.DataStore + schema datastore.DataStoreSchema +} + +// Creates a new BackfillMeta struct +func NewBackfillMeta(logger *supportlog.Entry, + rw db.ReadWriter, + reader db.LedgerReader, + ds datastore.DataStore, + dsSchema datastore.DataStoreSchema, +) BackfillMeta { return BackfillMeta{ - ctx: ctx, logger: logger, rw: rw, reader: reader, - dsInfo: dsInfo, + dsInfo: datastoreInfo{ + ds: ds, + schema: dsSchema, + }, } } @@ -38,23 +60,12 @@ func NewBackfillMeta(ctx context.Context, logger *supportlog.Entry, rw db.ReadWr // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { - ctx, cancelBackfill := context.WithTimeout(metaInfo.ctx, 4*time.Hour) // TODO: determine backfill timeout + ctx, cancelBackfill := context.WithTimeout(context.Background(), 4*time.Hour) // TODO: determine backfill timeout defer cancelBackfill() metaInfo.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") nBackfill := cfg.HistoryRetentionWindow - backend, err := makeBackend(metaInfo.dsInfo) - if err != nil { - return errors.Wrap(err, "could not create ledger backend") - } - metaInfo.dsInfo.backend = backend - defer func() { - if err := metaInfo.dsInfo.backend.Close(); err != nil { - metaInfo.logger.Warnf("error closing ledger backend: %v", err) - } - }() - // Determine what ledgers have been written to local DB var dbIsEmpty bool ledgerRange, err := metaInfo.reader.GetLedgerRange(ctx) @@ -68,7 +79,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Phase 1: precheck to ensure no gaps in local DB if !dbIsEmpty { - if err = metaInfo.verifyDbGapless(minWrittenLedger, maxWrittenLedger); err != nil { + if err = metaInfo.verifyDbGapless(ctx, minWrittenLedger, maxWrittenLedger); err != nil { return errors.Wrap(err, "backfill precheck failed") } } else { @@ -78,7 +89,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Phase 2: backfill backwards from minimum written ledger towards oldest ledger in retention window var currentTipLedger uint32 - if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.Ds); err != nil { + if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) @@ -97,7 +108,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { if lBoundBackwards < rBoundBackwards { metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", lBoundBackwards, rBoundBackwards) - if err = metaInfo.runBackfillBackwards(lBoundBackwards, rBoundBackwards); err != nil { + if err = metaInfo.runBackfillBackwards(ctx, lBoundBackwards, rBoundBackwards); err != nil { return errors.Wrap(err, "backfill backwards failed") } } else { @@ -106,23 +117,27 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB metaInfo.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") - if rBoundForwards, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.Ds); err != nil { + if rBoundForwards, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } + // if lBoundForwards > rBoundForwards { metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) - if err = metaInfo.runBackfillForwards(lBoundForwards, rBoundForwards); err != nil { + if err = metaInfo.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { return errors.Wrap(err, "backfill forwards failed") } + // } else { + // metaInfo.logger.Infof("No forwards backfill needed, local DB head already at tip") + // } // Phase 4: verify no gaps in local DB after backfill metaInfo.logger.Infof("Forward backfill complete, starting post-backfill verification") // Note final ledger we've backfilled to endSeq := rBoundForwards - if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.Ds); err != nil { + if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } startSeq := max(currentTipLedger-nBackfill+1, 1) - if err = metaInfo.verifyDbGapless(startSeq, endSeq); err != nil { + if err = metaInfo.verifyDbGapless(ctx, startSeq, endSeq); err != nil { return errors.Wrap(err, "post-backfill verification failed") } metaInfo.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", startSeq, endSeq) @@ -131,8 +146,8 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { } // Checks to ensure state of local DB is acceptable for backfilling -func (metaInfo *BackfillMeta) verifyDbGapless(minLedgerSeq uint32, maxLedgerSeq uint32) error { - ctx, cancelPrecheck := context.WithTimeout(metaInfo.ctx, 4*time.Minute) +func (metaInfo *BackfillMeta) verifyDbGapless(ctx context.Context, minLedgerSeq uint32, maxLedgerSeq uint32) error { + ctx, cancelPrecheck := context.WithTimeout(ctx, 4*time.Minute) defer cancelPrecheck() tx, err := metaInfo.reader.NewTx(ctx) @@ -160,27 +175,31 @@ func (metaInfo *BackfillMeta) verifyDbGapless(minLedgerSeq uint32, maxLedgerSeq // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers -func (metaInfo *BackfillMeta) runBackfillBackwards(lBound uint32, rBound uint32) error { +func (metaInfo *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound uint32, rBound uint32) error { for rChunkBound := rBound; rChunkBound >= lBound; { - if err := metaInfo.ctx.Err(); err != nil { + if err := ctx.Err(); err != nil { return err } // Create temporary backend for backwards-filling chunks + // Note monotonicity constraint of the ledger backend tempBackend, err := makeBackend(metaInfo.dsInfo) if err != nil { return errors.Wrap(err, "couldn't create backend") } - defer tempBackend.Close() lChunkBound := max(lBound, rChunkBound-ChunkSize+1) metaInfo.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := metaInfo.fillChunk(tempBackend, lChunkBound, rChunkBound); err != nil { + if err := metaInfo.fillChunk(ctx, tempBackend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/(rBound-lBound)) + if err := tempBackend.Close(); err != nil { + metaInfo.logger.Warnf("error closing temporary backend: %v", err) + } + if lChunkBound == lBound { break } @@ -191,15 +210,26 @@ func (metaInfo *BackfillMeta) runBackfillBackwards(lBound uint32, rBound uint32) // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func (metaInfo *BackfillMeta) runBackfillForwards(lBound uint32, rBound uint32) error { +func (metaInfo *BackfillMeta) runBackfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { + // Backend for forwards backfill can be persistent over multiple chunks + backend, err := makeBackend(metaInfo.dsInfo) + if err != nil { + return errors.Wrap(err, "could not create ledger backend") + } + defer func() { + if err := backend.Close(); err != nil { + metaInfo.logger.Warnf("error closing ledger backend: %v", err) + } + }() + for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { - if err := metaInfo.ctx.Err(); err != nil { + if err := ctx.Err(); err != nil { return err } rChunkBound := min(rBound, lChunkBound+ChunkSize-1) metaInfo.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := metaInfo.fillChunk(metaInfo.dsInfo.backend, lChunkBound, rChunkBound); err != nil { + if err := metaInfo.fillChunk(ctx, backend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } metaInfo.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -210,10 +240,10 @@ func (metaInfo *BackfillMeta) runBackfillForwards(lBound uint32, rBound uint32) // Fills a chunk of ledgers [left, right] from the given backend into the local DB // Fills from left to right (i.e. sequence number ascending) -func (metaInfo *BackfillMeta) fillChunk(readBackend ledgerbackend.LedgerBackend, left uint32, right uint32) error { +func (metaInfo *BackfillMeta) fillChunk(ctx context.Context, readBackend ledgerbackend.LedgerBackend, left uint32, right uint32) error { var ledger xdr.LedgerCloseMeta - tx, err := metaInfo.rw.NewTx(metaInfo.ctx) + tx, err := metaInfo.rw.NewTx(ctx) if err != nil { return errors.Wrap(err, "couldn't create local db write tx") } @@ -225,13 +255,13 @@ func (metaInfo *BackfillMeta) fillChunk(readBackend ledgerbackend.LedgerBackend, }() backfillRange := ledgerbackend.BoundedRange(left, right) - if err := readBackend.PrepareRange(metaInfo.ctx, backfillRange); err != nil { + if err := readBackend.PrepareRange(ctx, backfillRange); err != nil { return errors.Wrapf(err, "couldn't prepare range [%d, %d]", left, right) } for seq := left; seq <= right; seq++ { // Fetch ledger from backend, commit to local DB - ledger, err = readBackend.GetLedger(metaInfo.ctx, seq) + ledger, err = readBackend.GetLedger(ctx, seq) if err != nil { return errors.Wrapf(err, "couldn't get ledger %d from backend", seq) } @@ -246,7 +276,7 @@ func (metaInfo *BackfillMeta) fillChunk(readBackend ledgerbackend.LedgerBackend, } // Creates a buffered storage backend for the given datastore -func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { +func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { backend, err := ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ BufferSize: 1024, @@ -254,8 +284,8 @@ func makeBackend(dsInfo DatastoreInfo) (ledgerbackend.LedgerBackend, error) { RetryLimit: 3, RetryWait: 5 * time.Second, }, - dsInfo.Ds, - dsInfo.Schema, + dsInfo.ds, + dsInfo.schema, ) return backend, err } @@ -272,19 +302,3 @@ func getLatestSeqInCDP(callerCtx context.Context, ds datastore.DataStore) (uint3 } return seq, nil } - -type DatastoreInfo struct { - Ds datastore.DataStore - Schema datastore.DataStoreSchema - Config datastore.DataStoreConfig - backend ledgerbackend.LedgerBackend -} - -// This struct holds the metadata/constructs necessary for most backfilling operations -type BackfillMeta struct { - ctx context.Context - logger *supportlog.Entry - rw db.ReadWriter - reader db.LedgerReader - dsInfo DatastoreInfo -} From c440592d73f2f85e9ad4a95a2594dc0cf300bcdd Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 19 Dec 2025 15:13:14 -0500 Subject: [PATCH 19/72] handled rare/unlikely forwards backfill, already written up to tip case --- cmd/stellar-rpc/internal/ingest/backfill.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 404ef1de..1cb0bbca 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -120,14 +120,14 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { if rBoundForwards, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - // if lBoundForwards > rBoundForwards { - metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) - if err = metaInfo.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { - return errors.Wrap(err, "backfill forwards failed") + if lBoundForwards <= rBoundForwards { + metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) + if err = metaInfo.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { + return errors.Wrap(err, "backfill forwards failed") + } + } else { + metaInfo.logger.Infof("No forwards backfill needed, local DB head already at tip") } - // } else { - // metaInfo.logger.Infof("No forwards backfill needed, local DB head already at tip") - // } // Phase 4: verify no gaps in local DB after backfill metaInfo.logger.Infof("Forward backfill complete, starting post-backfill verification") From 7537872a24f3fa161a4ed825f2053a41e0300287 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 19 Dec 2025 15:18:11 -0500 Subject: [PATCH 20/72] handled extremely rare division by zero case --- cmd/stellar-rpc/internal/ingest/backfill.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 1cb0bbca..58f548fd 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -120,13 +120,13 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { if rBoundForwards, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - if lBoundForwards <= rBoundForwards { + if lBoundForwards < rBoundForwards { metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) if err = metaInfo.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { return errors.Wrap(err, "backfill forwards failed") } } else { - metaInfo.logger.Infof("No forwards backfill needed, local DB head already at tip") + metaInfo.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") } // Phase 4: verify no gaps in local DB after backfill From c31a46832149ed8bb2b9ae3ac2477c849d0298f0 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 19 Dec 2025 16:39:46 -0500 Subject: [PATCH 21/72] refactored; fixed history_retention>available ledgers bug; fixed progress log --- cmd/stellar-rpc/internal/daemon/daemon.go | 7 +- cmd/stellar-rpc/internal/ingest/backfill.go | 115 +++++++++++++------- 2 files changed, 83 insertions(+), 39 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index f76cb43e..dfdabb1d 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -203,13 +203,16 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } if cfg.Backfill { - backfillMeta := ingest.NewBackfillMeta( - logger, + backfillMeta, err := ingest.NewBackfillMeta( + logger.WithField("subservice", "backfill"), rw, db.NewLedgerReader(daemon.db), daemon.dataStore, daemon.dataStoreSchema, ) + if err != nil { + logger.WithError(err).Fatal("failed to create backfill metadata") + } if err := backfillMeta.RunBackfill(cfg); err != nil { logger.WithError(err).Fatal("failed to backfill ledgers") diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 58f548fd..f8de176a 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -21,39 +21,77 @@ const ( OneDayOfLedgers = config.OneDayOfLedgers SevenDayOfLedgers = config.OneDayOfLedgers * 7 // Number of ledgers to read/write at a time during backfill - ChunkSize uint32 = OneDayOfLedgers / 4 // 6 hours. Takes 2.75 minutes to process on an M4 MacBook Pro + // 12 hours/17280 ledgers on an M4 MacBook Pro, backfill takes: + // on pubnet: 5.5 minutes; on testnet: + ChunkSize uint32 = OneDayOfLedgers / 2 ) // This struct holds the metadata/constructs necessary for most backfilling operations, including // the local database reader and writer, the cloud datastore info, and a logger. type BackfillMeta struct { logger *supportlog.Entry - rw db.ReadWriter - reader db.LedgerReader dsInfo datastoreInfo + dbInfo databaseInfo } type datastoreInfo struct { - ds datastore.DataStore - schema datastore.DataStoreSchema + ds datastore.DataStore + schema datastore.DataStoreSchema + minWrittenLedger uint32 + maxWrittenLedger uint32 +} + +type databaseInfo struct { + rw db.ReadWriter + reader db.LedgerReader + minWrittenLedger uint32 + maxWrittenLedger uint32 + isEmpty bool } // Creates a new BackfillMeta struct -func NewBackfillMeta(logger *supportlog.Entry, +func NewBackfillMeta( + logger *supportlog.Entry, rw db.ReadWriter, reader db.LedgerReader, ds datastore.DataStore, dsSchema datastore.DataStoreSchema, -) BackfillMeta { +) (BackfillMeta, error) { + // Note: O(L) on number of ledgers in cloud datastore + ctx, cancelInit := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelInit() + + // Query local DB to determine min and max sequence numbers among the written ledgers + var dbIsEmpty bool + ledgerRange, err := reader.GetLedgerRange(ctx) + if errors.Is(err, db.ErrEmptyDB) { + dbIsEmpty = true + } else if err != nil { + return BackfillMeta{}, errors.Wrap(err, "could not get ledger range from local DB") + } + // Query remote datastore to determine min and max sequence numbers among the written ledgers + minWrittenDSLedger, err := datastore.FindOldestLedgerSequence(ctx, ds, dsSchema) + if err != nil { + return BackfillMeta{}, errors.Wrap(err, "could not get oldest ledger sequence from datastore") + } + return BackfillMeta{ logger: logger, - rw: rw, - reader: reader, + dbInfo: databaseInfo{ + rw: rw, + reader: reader, + minWrittenLedger: ledgerRange.FirstLedger.Sequence, + maxWrittenLedger: ledgerRange.LastLedger.Sequence, + isEmpty: dbIsEmpty, + }, dsInfo: datastoreInfo{ - ds: ds, - schema: dsSchema, + ds: ds, + schema: dsSchema, + minWrittenLedger: minWrittenDSLedger, + // Note maxWrittenLedger is excluded because it goes stale every 6 seconds + // it is replaced by `currentTipLedger` in RunBackfill }, - } + }, nil } // This function backfills the local database with ledgers from the datastore @@ -66,20 +104,9 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { metaInfo.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") nBackfill := cfg.HistoryRetentionWindow - // Determine what ledgers have been written to local DB - var dbIsEmpty bool - ledgerRange, err := metaInfo.reader.GetLedgerRange(ctx) - if errors.Is(err, db.ErrEmptyDB) { - dbIsEmpty = true - } else if err != nil { - dbIsEmpty = false - return errors.Wrap(err, "could not get ledger range from local DB") - } - maxWrittenLedger, minWrittenLedger := ledgerRange.LastLedger.Sequence, ledgerRange.FirstLedger.Sequence - // Phase 1: precheck to ensure no gaps in local DB - if !dbIsEmpty { - if err = metaInfo.verifyDbGapless(ctx, minWrittenLedger, maxWrittenLedger); err != nil { + if !metaInfo.dbInfo.isEmpty { + if err := metaInfo.verifyDbGapless(ctx, metaInfo.dbInfo.minWrittenLedger, metaInfo.dbInfo.maxWrittenLedger); err != nil { return errors.Wrap(err, "backfill precheck failed") } } else { @@ -89,26 +116,34 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Phase 2: backfill backwards from minimum written ledger towards oldest ledger in retention window var currentTipLedger uint32 + var err error if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) + // Adjust nBackfill if datastore has fewer ledgers than the retention window + if currentTipLedger < nBackfill { + metaInfo.logger.Warnf("Datastore has fewer ledgers (%d) than retention window (%d); "+ + "backfilling all available ledgers", currentTipLedger, nBackfill) + nBackfill = currentTipLedger + } + // Bounds for ledgers to be written to local DB in backwards and forwards phases var lBoundBackwards, rBoundBackwards uint32 var lBoundForwards, rBoundForwards uint32 - lBoundBackwards = max(currentTipLedger-nBackfill+1, 1) - if dbIsEmpty { + lBoundBackwards = max(currentTipLedger-nBackfill+1, metaInfo.dsInfo.minWrittenLedger) + if metaInfo.dbInfo.isEmpty { rBoundBackwards = currentTipLedger lBoundForwards = rBoundBackwards + 1 } else { - rBoundBackwards = minWrittenLedger - 1 - lBoundForwards = maxWrittenLedger + 1 + rBoundBackwards = metaInfo.dbInfo.minWrittenLedger - 1 + lBoundForwards = metaInfo.dbInfo.maxWrittenLedger + 1 } if lBoundBackwards < rBoundBackwards { metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", lBoundBackwards, rBoundBackwards) - if err = metaInfo.runBackfillBackwards(ctx, lBoundBackwards, rBoundBackwards); err != nil { + if err := metaInfo.runBackfillBackwards(ctx, lBoundBackwards, rBoundBackwards); err != nil { return errors.Wrap(err, "backfill backwards failed") } } else { @@ -120,7 +155,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { if rBoundForwards, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - if lBoundForwards < rBoundForwards { + if lBoundForwards <= rBoundForwards { metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) if err = metaInfo.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { return errors.Wrap(err, "backfill forwards failed") @@ -136,7 +171,7 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - startSeq := max(currentTipLedger-nBackfill+1, 1) + startSeq := max(currentTipLedger-nBackfill+1, metaInfo.dsInfo.minWrittenLedger) if err = metaInfo.verifyDbGapless(ctx, startSeq, endSeq); err != nil { return errors.Wrap(err, "post-backfill verification failed") } @@ -150,7 +185,7 @@ func (metaInfo *BackfillMeta) verifyDbGapless(ctx context.Context, minLedgerSeq ctx, cancelPrecheck := context.WithTimeout(ctx, 4*time.Minute) defer cancelPrecheck() - tx, err := metaInfo.reader.NewTx(ctx) + tx, err := metaInfo.dbInfo.reader.NewTx(ctx) if err != nil { return errors.Wrap(err, "db verify: failed to begin read transaction") } @@ -187,14 +222,20 @@ func (metaInfo *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u return errors.Wrap(err, "couldn't create backend") } - lChunkBound := max(lBound, rChunkBound-ChunkSize+1) + var lChunkBound uint32 + // Underflow check for chunk bounds + if rChunkBound >= lBound+ChunkSize-1 { + lChunkBound = max(lBound, rChunkBound-ChunkSize+1) + } else { + lChunkBound = lBound + } metaInfo.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) if err := metaInfo.fillChunk(ctx, tempBackend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", - lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/(rBound-lBound)) + lChunkBound, rChunkBound, 100*(1-(rChunkBound-lBound)/max(rBound-lBound, 1))) if err := tempBackend.Close(); err != nil { metaInfo.logger.Warnf("error closing temporary backend: %v", err) @@ -233,7 +274,7 @@ func (metaInfo *BackfillMeta) runBackfillForwards(ctx context.Context, lBound ui return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } metaInfo.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", - lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/(rBound-lBound)) + lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/max(rBound-lBound, 1)) } return nil } @@ -243,7 +284,7 @@ func (metaInfo *BackfillMeta) runBackfillForwards(ctx context.Context, lBound ui func (metaInfo *BackfillMeta) fillChunk(ctx context.Context, readBackend ledgerbackend.LedgerBackend, left uint32, right uint32) error { var ledger xdr.LedgerCloseMeta - tx, err := metaInfo.rw.NewTx(ctx) + tx, err := metaInfo.dbInfo.rw.NewTx(ctx) if err != nil { return errors.Wrap(err, "couldn't create local db write tx") } From 7b9ffa7c2159041e09a0770afff7af18ebdddb61 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 19 Dec 2025 16:55:17 -0500 Subject: [PATCH 22/72] minor: fixed accidental integer division rounding bug --- cmd/stellar-rpc/internal/ingest/backfill.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index f8de176a..1b6854d3 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -235,7 +235,7 @@ func (metaInfo *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", - lChunkBound, rChunkBound, 100*(1-(rChunkBound-lBound)/max(rBound-lBound, 1))) + lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) if err := tempBackend.Close(); err != nil { metaInfo.logger.Warnf("error closing temporary backend: %v", err) From 397f58186be2bb594f3c8b22aa846ce2ae9aa17f Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 2 Jan 2026 16:33:47 -0500 Subject: [PATCH 23/72] abstracted ingestion/chunk filling to service.go, debugging service start post-backfill --- cmd/stellar-rpc/internal/daemon/daemon.go | 16 +- cmd/stellar-rpc/internal/ingest/backfill.go | 231 ++++++++++---------- cmd/stellar-rpc/internal/ingest/service.go | 66 +++++- 3 files changed, 193 insertions(+), 120 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index dfdabb1d..195afdb9 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -202,10 +202,12 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { if cfg.ServeLedgersFromDatastore { daemon.dataStore, daemon.dataStoreSchema = mustCreateDataStore(cfg, logger) } + var ingestCfg ingest.Config + daemon.ingestService, ingestCfg = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) if cfg.Backfill { backfillMeta, err := ingest.NewBackfillMeta( - logger.WithField("subservice", "backfill"), - rw, + logger, + daemon.ingestService, db.NewLedgerReader(daemon.db), daemon.dataStore, daemon.dataStoreSchema, @@ -218,8 +220,9 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { logger.WithError(err).Fatal("failed to backfill ledgers") } } + // Start ingestion service only after backfill is complete + ingest.StartService(daemon.ingestService, ingestCfg) - daemon.ingestService = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) @@ -313,12 +316,12 @@ func createHighperfStellarCoreClient(cfg *config.Config) interfaces.FastCoreClie func createIngestService(cfg *config.Config, logger *supportlog.Entry, daemon *Daemon, feewindows *feewindow.FeeWindows, historyArchive *historyarchive.ArchiveInterface, rw db.ReadWriter, -) *ingest.Service { +) (*ingest.Service, ingest.Config) { onIngestionRetry := func(err error, _ time.Duration) { logger.WithError(err).Error("could not run ingestion. Retrying") } - return ingest.NewService(ingest.Config{ + ingestCfg := ingest.Config{ Logger: logger, DB: rw, NetworkPassPhrase: cfg.NetworkPassphrase, @@ -328,7 +331,8 @@ func createIngestService(cfg *config.Config, logger *supportlog.Entry, daemon *D OnIngestionRetry: onIngestionRetry, Daemon: daemon, FeeWindows: feewindows, - }) + } + return ingest.NewService(ingestCfg), ingestCfg } func createPreflightWorkerPool(cfg *config.Config, logger *supportlog.Entry, daemon *Daemon) *preflight.WorkerPool { diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 1b6854d3..2b58184b 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -10,7 +10,6 @@ import ( "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/datastore" supportlog "github.com/stellar/go-stellar-sdk/support/log" - "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/config" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/db" @@ -21,38 +20,41 @@ const ( OneDayOfLedgers = config.OneDayOfLedgers SevenDayOfLedgers = config.OneDayOfLedgers * 7 // Number of ledgers to read/write at a time during backfill - // 12 hours/17280 ledgers on an M4 MacBook Pro, backfill takes: - // on pubnet: 5.5 minutes; on testnet: + // 12 hours/8640 ledgers on an M4 MacBook Pro, backfill takes: + // on pubnet: 5.5 minutes; on testnet: ~2.2 seconds ChunkSize uint32 = OneDayOfLedgers / 2 ) // This struct holds the metadata/constructs necessary for most backfilling operations, including // the local database reader and writer, the cloud datastore info, and a logger. type BackfillMeta struct { - logger *supportlog.Entry - dsInfo datastoreInfo - dbInfo databaseInfo + logger *supportlog.Entry + ingestService *Service + dsInfo datastoreInfo + dbInfo databaseInfo } type datastoreInfo struct { - ds datastore.DataStore - schema datastore.DataStoreSchema - minWrittenLedger uint32 - maxWrittenLedger uint32 + ds datastore.DataStore + schema datastore.DataStoreSchema + minSeq uint32 + // Note maxSeq is excluded because it goes stale every 6 seconds + // it is replaced by `currentTipLedger` in RunBackfill } +// This struct holds the local database read/write constructs and metadata initially associated with it type databaseInfo struct { - rw db.ReadWriter - reader db.LedgerReader - minWrittenLedger uint32 - maxWrittenLedger uint32 - isEmpty bool + rw db.ReadWriter + reader db.LedgerReader + minSeq uint32 + maxSeq uint32 + isEmpty bool } // Creates a new BackfillMeta struct func NewBackfillMeta( logger *supportlog.Entry, - rw db.ReadWriter, + service *Service, reader db.LedgerReader, ds datastore.DataStore, dsSchema datastore.DataStoreSchema, @@ -76,20 +78,18 @@ func NewBackfillMeta( } return BackfillMeta{ - logger: logger, + logger: service.logger.WithField("component", "backfill"), + ingestService: service, dbInfo: databaseInfo{ - rw: rw, - reader: reader, - minWrittenLedger: ledgerRange.FirstLedger.Sequence, - maxWrittenLedger: ledgerRange.LastLedger.Sequence, - isEmpty: dbIsEmpty, + reader: reader, + minSeq: ledgerRange.FirstLedger.Sequence, + maxSeq: ledgerRange.LastLedger.Sequence, + isEmpty: dbIsEmpty, }, dsInfo: datastoreInfo{ - ds: ds, - schema: dsSchema, - minWrittenLedger: minWrittenDSLedger, - // Note maxWrittenLedger is excluded because it goes stale every 6 seconds - // it is replaced by `currentTipLedger` in RunBackfill + ds: ds, + schema: dsSchema, + minSeq: minWrittenDSLedger, }, }, nil } @@ -97,34 +97,35 @@ func NewBackfillMeta( // This function backfills the local database with ledgers from the datastore // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling -func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { +func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { ctx, cancelBackfill := context.WithTimeout(context.Background(), 4*time.Hour) // TODO: determine backfill timeout defer cancelBackfill() - metaInfo.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") - nBackfill := cfg.HistoryRetentionWindow + backfill.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") + ledgersInCheckpoint := cfg.CheckpointFrequency + nBackfill := cfg.HistoryRetentionWindow + ledgersInCheckpoint // Phase 1: precheck to ensure no gaps in local DB - if !metaInfo.dbInfo.isEmpty { - if err := metaInfo.verifyDbGapless(ctx, metaInfo.dbInfo.minWrittenLedger, metaInfo.dbInfo.maxWrittenLedger); err != nil { + if !backfill.dbInfo.isEmpty { + if err := backfill.verifyDbGapless(ctx, backfill.dbInfo.minSeq, backfill.dbInfo.maxSeq); err != nil { return errors.Wrap(err, "backfill precheck failed") } } else { - metaInfo.logger.Infof("Local DB is empty, skipping precheck") + backfill.logger.Infof("Local DB is empty, skipping precheck") } - metaInfo.logger.Infof("Precheck passed! Starting backfill backwards phase (phase 2 of 4)") + backfill.logger.Infof("Precheck passed! Starting backfill backwards phase (phase 2 of 4)") - // Phase 2: backfill backwards from minimum written ledger towards oldest ledger in retention window + // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window var currentTipLedger uint32 var err error - if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { + if currentTipLedger, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - metaInfo.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) + backfill.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) // Adjust nBackfill if datastore has fewer ledgers than the retention window if currentTipLedger < nBackfill { - metaInfo.logger.Warnf("Datastore has fewer ledgers (%d) than retention window (%d); "+ + backfill.logger.Warnf("Datastore has fewer ledgers (%d) than retention window (%d); "+ "backfilling all available ledgers", currentTipLedger, nBackfill) nBackfill = currentTipLedger } @@ -132,66 +133,67 @@ func (metaInfo *BackfillMeta) RunBackfill(cfg *config.Config) error { // Bounds for ledgers to be written to local DB in backwards and forwards phases var lBoundBackwards, rBoundBackwards uint32 var lBoundForwards, rBoundForwards uint32 - lBoundBackwards = max(currentTipLedger-nBackfill+1, metaInfo.dsInfo.minWrittenLedger) - if metaInfo.dbInfo.isEmpty { + lBoundBackwards = max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) + if backfill.dbInfo.isEmpty { rBoundBackwards = currentTipLedger lBoundForwards = rBoundBackwards + 1 } else { - rBoundBackwards = metaInfo.dbInfo.minWrittenLedger - 1 - lBoundForwards = metaInfo.dbInfo.maxWrittenLedger + 1 + rBoundBackwards = backfill.dbInfo.minSeq - 1 + lBoundForwards = backfill.dbInfo.maxSeq + 1 } if lBoundBackwards < rBoundBackwards { - metaInfo.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", + backfill.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", lBoundBackwards, rBoundBackwards) - if err := metaInfo.runBackfillBackwards(ctx, lBoundBackwards, rBoundBackwards); err != nil { + if err := backfill.runBackfillBackwards(ctx, lBoundBackwards, rBoundBackwards); err != nil { return errors.Wrap(err, "backfill backwards failed") } } else { - metaInfo.logger.Infof("No backwards backfill needed, local DB tail already covers retention window") + backfill.logger.Infof("No backwards backfill needed, local DB tail already covers retention window") } // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB - metaInfo.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") - if rBoundForwards, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { + backfill.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") + if rBoundForwards, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } if lBoundForwards <= rBoundForwards { - metaInfo.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) - if err = metaInfo.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { + rBoundForwards = rBoundForwards - (rBoundForwards % ledgersInCheckpoint) // Align to checkpoint + backfill.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) + if err = backfill.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { return errors.Wrap(err, "backfill forwards failed") } } else { - metaInfo.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") + backfill.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") } // Phase 4: verify no gaps in local DB after backfill - metaInfo.logger.Infof("Forward backfill complete, starting post-backfill verification") + backfill.logger.Infof("Forward backfill complete, starting post-backfill verification") // Note final ledger we've backfilled to endSeq := rBoundForwards - if currentTipLedger, err = getLatestSeqInCDP(ctx, metaInfo.dsInfo.ds); err != nil { + if currentTipLedger, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - startSeq := max(currentTipLedger-nBackfill+1, metaInfo.dsInfo.minWrittenLedger) - if err = metaInfo.verifyDbGapless(ctx, startSeq, endSeq); err != nil { + startSeq := max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) + if err = backfill.verifyDbGapless(ctx, startSeq, endSeq); err != nil { return errors.Wrap(err, "post-backfill verification failed") } - metaInfo.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", startSeq, endSeq) - + backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", startSeq, endSeq) + // time.Sleep(20 * time.Second) return nil } // Checks to ensure state of local DB is acceptable for backfilling -func (metaInfo *BackfillMeta) verifyDbGapless(ctx context.Context, minLedgerSeq uint32, maxLedgerSeq uint32) error { +func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context, minLedgerSeq uint32, maxLedgerSeq uint32) error { ctx, cancelPrecheck := context.WithTimeout(ctx, 4*time.Minute) defer cancelPrecheck() - tx, err := metaInfo.dbInfo.reader.NewTx(ctx) + tx, err := backfill.dbInfo.reader.NewTx(ctx) if err != nil { return errors.Wrap(err, "db verify: failed to begin read transaction") } defer func() { if rollbackErr := tx.Done(); rollbackErr != nil { - metaInfo.logger.Warnf("couldn't rollback in verifyDbGapless: %v", rollbackErr) + backfill.logger.Warnf("couldn't rollback in verifyDbGapless: %v", rollbackErr) } }() @@ -210,14 +212,14 @@ func (metaInfo *BackfillMeta) verifyDbGapless(ctx context.Context, minLedgerSeq // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers -func (metaInfo *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound uint32, rBound uint32) error { +func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound uint32, rBound uint32) error { for rChunkBound := rBound; rChunkBound >= lBound; { if err := ctx.Err(); err != nil { return err } // Create temporary backend for backwards-filling chunks // Note monotonicity constraint of the ledger backend - tempBackend, err := makeBackend(metaInfo.dsInfo) + tempBackend, err := makeBackend(backfill.dsInfo) if err != nil { return errors.Wrap(err, "couldn't create backend") } @@ -229,16 +231,16 @@ func (metaInfo *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u } else { lChunkBound = lBound } - metaInfo.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) + backfill.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := metaInfo.fillChunk(ctx, tempBackend, lChunkBound, rChunkBound); err != nil { + if err := backfill.fillChunk(ctx, backfill.ingestService, tempBackend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - metaInfo.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", + backfill.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) if err := tempBackend.Close(); err != nil { - metaInfo.logger.Warnf("error closing temporary backend: %v", err) + backfill.logger.Warnf("error closing temporary backend: %v", err) } if lChunkBound == lBound { @@ -251,29 +253,29 @@ func (metaInfo *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func (metaInfo *BackfillMeta) runBackfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { - // Backend for forwards backfill can be persistent over multiple chunks - backend, err := makeBackend(metaInfo.dsInfo) - if err != nil { - return errors.Wrap(err, "could not create ledger backend") - } - defer func() { - if err := backend.Close(); err != nil { - metaInfo.logger.Warnf("error closing ledger backend: %v", err) - } - }() +func (backfill *BackfillMeta) runBackfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { + // // Backend for forwards backfill can be persistent over multiple chunks + // backend, err := makeBackend(backfill.dsInfo) + // if err != nil { + // return errors.Wrap(err, "could not create ledger backend") + // } + // defer func() { + // if err := backend.Close(); err != nil { + // backfill.logger.Warnf("error closing ledger backend: %v", err) + // } + // }() for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { if err := ctx.Err(); err != nil { return err } rChunkBound := min(rBound, lChunkBound+ChunkSize-1) - metaInfo.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) + backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := metaInfo.fillChunk(ctx, backend, lChunkBound, rChunkBound); err != nil { + if err := backfill.fillChunk(ctx, backfill.ingestService, backfill.ingestService.ledgerBackend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - metaInfo.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", + backfill.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/max(rBound-lBound, 1)) } return nil @@ -281,38 +283,45 @@ func (metaInfo *BackfillMeta) runBackfillForwards(ctx context.Context, lBound ui // Fills a chunk of ledgers [left, right] from the given backend into the local DB // Fills from left to right (i.e. sequence number ascending) -func (metaInfo *BackfillMeta) fillChunk(ctx context.Context, readBackend ledgerbackend.LedgerBackend, left uint32, right uint32) error { - var ledger xdr.LedgerCloseMeta - - tx, err := metaInfo.dbInfo.rw.NewTx(ctx) - if err != nil { - return errors.Wrap(err, "couldn't create local db write tx") - } - // Log rollback errors on failure with where they failed - defer func() { - if rollbackErr := tx.Rollback(); rollbackErr != nil { - metaInfo.logger.Warnf("couldn't rollback in fillChunk: %v", rollbackErr) - } - }() - - backfillRange := ledgerbackend.BoundedRange(left, right) - if err := readBackend.PrepareRange(ctx, backfillRange); err != nil { - return errors.Wrapf(err, "couldn't prepare range [%d, %d]", left, right) - } - - for seq := left; seq <= right; seq++ { - // Fetch ledger from backend, commit to local DB - ledger, err = readBackend.GetLedger(ctx, seq) - if err != nil { - return errors.Wrapf(err, "couldn't get ledger %d from backend", seq) - } - if err = tx.LedgerWriter().InsertLedger(ledger); err != nil { - return errors.Wrapf(err, "couldn't write ledger %d to local db", seq) - } - } - if err := tx.Commit(ledger, nil); err != nil { - return errors.Wrapf(err, "couldn't commit range [%d, %d]", left, right) - } +func (backfill *BackfillMeta) fillChunk( + ctx context.Context, + service *Service, + readBackend ledgerbackend.LedgerBackend, + left, right uint32, +) error { + service.ingestRange(ctx, readBackend, ledgerbackend.BoundedRange(left, right)) + // var ledger xdr.LedgerCloseMeta + + // tx, err := backfill.dbInfo.rw.NewTx(ctx) + // if err != nil { + // return errors.Wrap(err, "couldn't create local db write tx") + // } + // // Log rollback errors on failure with where they failed + // defer func() { + // if rollbackErr := tx.Rollback(); rollbackErr != nil { + // backfill.logger.Warnf("couldn't rollback in fillChunk: %v", rollbackErr) + // } + // }() + + // backfillRange := ledgerbackend.BoundedRange(left, right) + // if err := readBackend.PrepareRange(ctx, backfillRange); err != nil { + // return errors.Wrapf(err, "couldn't prepare range [%d, %d]", left, right) + // } + + // for seq := left; seq <= right; seq++ { + // Fetch ledger from backend, commit to local DB + // ledger, err = readBackend.GetLedger(ctx, seq) + // if err != nil { + // return errors.Wrapf(err, "couldn't get ledger %d from backend", seq) + // } + // if err = tx.LedgerWriter().InsertLedger(ledger); err != nil { + // return errors.Wrapf(err, "couldn't write ledger %d to local db", seq) + // } + + // } + // if err := tx.Commit(ledger, nil); err != nil { + // return errors.Wrapf(err, "couldn't commit range [%d, %d]", left, right) + // } return nil } diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index 3963abfa..de2c0034 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -41,7 +41,7 @@ type Config struct { func NewService(cfg Config) *Service { service := newService(cfg) - startService(service, cfg) + // StartService(service, cfg) return service } @@ -91,7 +91,7 @@ func newService(cfg Config) *Service { return service } -func startService(service *Service, cfg Config) { +func StartService(service *Service, cfg Config) { ctx, done := context.WithCancel(context.Background()) service.done = done service.wg.Add(1) @@ -135,6 +135,7 @@ type Service struct { done context.CancelFunc wg sync.WaitGroup metrics Metrics + latestIngestedSeq uint32 } func (s *Service) Close() error { @@ -224,7 +225,66 @@ func (s *Service) ingest(ctx context.Context, sequence uint32) error { s.metrics.ingestionDurationMetric. With(prometheus.Labels{"type": "total"}). Observe(time.Since(startTime).Seconds()) - s.metrics.latestLedgerMetric.Set(float64(sequence)) + if sequence > s.latestIngestedSeq { + s.latestIngestedSeq = sequence + s.metrics.latestLedgerMetric.Set(float64(sequence)) + } + return nil +} + +// Ingests a range of ledgers from a provided ledgerBackend +// Prepares all ledgers in the range if not already prepared +func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBackend, seqRange backends.Range) error { + s.logger.Infof("Ingesting ledgers [%d, %d]", seqRange.From(), seqRange.To()) + var ledgerCloseMeta xdr.LedgerCloseMeta + + startTime := time.Now() + tx, err := s.db.NewTx(ctx) + if err != nil { + return err + } + + if err := backend.PrepareRange(ctx, seqRange); err != nil { + return err + } + + defer func() { + if err := tx.Rollback(); err != nil { + s.logger.WithError(err).Warn("could not rollback ingest write transactions") + } + }() + + for seq := seqRange.From(); seq <= seqRange.To(); seq++ { + ledgerCloseMeta, err = backend.GetLedger(ctx, seq) + if err != nil { + return err + } + if err := s.ingestLedgerCloseMeta(tx, ledgerCloseMeta); err != nil { + return err + } + } + + durationMetrics := map[string]time.Duration{} + if err := tx.Commit(ledgerCloseMeta, durationMetrics); err != nil { + return err + } + for key, duration := range durationMetrics { + s.metrics.ingestionDurationMetric. + With(prometheus.Labels{"type": key}). + Observe(duration.Seconds()) + } + + s.logger. + WithField("duration", time.Since(startTime).Seconds()). + Debugf("Ingested ledgers [%d, %d]", seqRange.From(), seqRange.To()) + + s.metrics.ingestionDurationMetric. + With(prometheus.Labels{"type": "total"}). + Observe(time.Since(startTime).Seconds()) + if seqRange.To() > s.latestIngestedSeq { + s.latestIngestedSeq = seqRange.To() + s.metrics.latestLedgerMetric.Set(float64(seqRange.To())) + } return nil } From 91f0c732bc159a17c4f9294e7d8557f5b7f77d92 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Mon, 5 Jan 2026 12:34:12 -0500 Subject: [PATCH 24/72] service start post-backfill working --- cmd/stellar-rpc/internal/ingest/backfill.go | 5 ++--- cmd/stellar-rpc/internal/ingest/service.go | 5 +---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 2b58184b..a60bce93 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -105,7 +105,7 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { ledgersInCheckpoint := cfg.CheckpointFrequency nBackfill := cfg.HistoryRetentionWindow + ledgersInCheckpoint - // Phase 1: precheck to ensure no gaps in local DB + // Phase 1: precheck to ensure no pre-existing gaps in local DB if !backfill.dbInfo.isEmpty { if err := backfill.verifyDbGapless(ctx, backfill.dbInfo.minSeq, backfill.dbInfo.maxSeq); err != nil { return errors.Wrap(err, "backfill precheck failed") @@ -173,12 +173,11 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { if currentTipLedger, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - startSeq := max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) + startSeq := max(currentTipLedger-nBackfill+1, backfill.dbInfo.minSeq) if err = backfill.verifyDbGapless(ctx, startSeq, endSeq); err != nil { return errors.Wrap(err, "post-backfill verification failed") } backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", startSeq, endSeq) - // time.Sleep(20 * time.Second) return nil } diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index de2c0034..2bb49ed1 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -233,7 +233,7 @@ func (s *Service) ingest(ctx context.Context, sequence uint32) error { } // Ingests a range of ledgers from a provided ledgerBackend -// Prepares all ledgers in the range if not already prepared +// Does NOT call ingestLedgerCloseMeta for each ledger as these metrics aren't suitable for backfilling func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBackend, seqRange backends.Range) error { s.logger.Infof("Ingesting ledgers [%d, %d]", seqRange.From(), seqRange.To()) var ledgerCloseMeta xdr.LedgerCloseMeta @@ -259,9 +259,6 @@ func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBacken if err != nil { return err } - if err := s.ingestLedgerCloseMeta(tx, ledgerCloseMeta); err != nil { - return err - } } durationMetrics := map[string]time.Duration{} From 5e25abfbf52ec758ff0578708338bc2ce9452003 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Mon, 5 Jan 2026 17:51:12 -0500 Subject: [PATCH 25/72] patched verification bug --- cmd/stellar-rpc/internal/db/db.go | 8 +- cmd/stellar-rpc/internal/db/ledger.go | 36 +++--- cmd/stellar-rpc/internal/db/mocks.go | 4 + cmd/stellar-rpc/internal/ingest/backfill.go | 115 ++++++++------------ cmd/stellar-rpc/internal/ingest/service.go | 3 + cmd/stellar-rpc/internal/methods/mocks.go | 10 +- 6 files changed, 79 insertions(+), 97 deletions(-) diff --git a/cmd/stellar-rpc/internal/db/db.go b/cmd/stellar-rpc/internal/db/db.go index fe5c24fc..af23bc8e 100644 --- a/cmd/stellar-rpc/internal/db/db.go +++ b/cmd/stellar-rpc/internal/db/db.go @@ -156,7 +156,7 @@ func getLatestLedgerSequence(ctx context.Context, ledgerReader LedgerReader, cac // Add missing ledger sequence and close time to the top cache. // Otherwise, the write-through cache won't get updated until the first ingestion commit cache.Lock() - if cache.latestLedgerSeq == 0 { + if cache.latestLedgerSeq < ledgerRange.LastLedger.Sequence { // Only update the cache if the value is missing (0), otherwise // we may end up overwriting the entry with an older version cache.latestLedgerSeq = ledgerRange.LastLedger.Sequence @@ -335,8 +335,10 @@ func (w writeTx) Commit(ledgerCloseMeta xdr.LedgerCloseMeta, durationMetrics map if err := w.tx.Commit(); err != nil { return err } - w.globalCache.latestLedgerSeq = ledgerSeq - w.globalCache.latestLedgerCloseTime = ledgerCloseTime + if ledgerSeq > w.globalCache.latestLedgerSeq { + w.globalCache.latestLedgerSeq = ledgerSeq + w.globalCache.latestLedgerCloseTime = ledgerCloseTime + } return nil } startTime = time.Now() diff --git a/cmd/stellar-rpc/internal/db/ledger.go b/cmd/stellar-rpc/internal/db/ledger.go index db3b38e4..10432ec0 100644 --- a/cmd/stellar-rpc/internal/db/ledger.go +++ b/cmd/stellar-rpc/internal/db/ledger.go @@ -25,6 +25,7 @@ type LedgerReader interface { GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, bool, error) StreamAllLedgers(ctx context.Context, f StreamLedgerFn) error GetLedgerRange(ctx context.Context) (ledgerbucketwindow.LedgerRange, error) + GetLedgerSequencesInRange(ctx context.Context, start uint32, end uint32) ([]uint32, error) StreamLedgerRange(ctx context.Context, startLedger uint32, endLedger uint32, f StreamLedgerFn) error NewTx(ctx context.Context) (LedgerReaderTx, error) GetLatestLedgerSequence(ctx context.Context) (uint32, error) @@ -34,7 +35,6 @@ type LedgerReaderTx interface { GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, bool, error) GetLedgerRange(ctx context.Context) (ledgerbucketwindow.LedgerRange, error) BatchGetLedgers(ctx context.Context, start uint32, end uint32) ([]LedgerMetadataChunk, error) - CountLedgersInRange(ctx context.Context, start uint32, end uint32) (uint32, error) Done() error } @@ -118,23 +118,6 @@ func (l ledgerReaderTx) GetLedger(ctx context.Context, sequence uint32) (xdr.Led return getLedgerFromDB(ctx, l.tx, sequence) } -func (l ledgerReaderTx) CountLedgersInRange(ctx context.Context, start uint32, end uint32) (uint32, error) { - sql := sq.Select("COUNT(*)").From(ledgerCloseMetaTableName). - Where(sq.And{ - sq.GtOrEq{"sequence": start}, - sq.LtOrEq{"sequence": end}, - }) - - var ct []uint32 - if err := l.tx.Select(ctx, &ct, sql); err != nil { - return 0, err - } - if len(ct) != 1 { - return 0, fmt.Errorf("expected 1 count result, got %d", len(ct)) - } - return ct[0], nil -} - func (l ledgerReaderTx) Done() error { return l.tx.Rollback() } @@ -226,6 +209,10 @@ func (r ledgerReader) GetLedgerRange(ctx context.Context) (ledgerbucketwindow.Le return getLedgerRangeWithoutCache(ctx, r.db) } +func (r ledgerReader) GetLedgerSequencesInRange(ctx context.Context, start uint32, end uint32) ([]uint32, error) { + return getLedgerSequencesInRange(ctx, r.db, start, end) +} + func (r ledgerReader) GetLatestLedgerSequence(ctx context.Context) (uint32, error) { return getLatestLedgerSequence(ctx, r, r.db.cache) } @@ -291,6 +278,19 @@ func getLedgerRangeWithoutCache(ctx context.Context, db readDB) (ledgerbucketwin }, nil } +func getLedgerSequencesInRange(ctx context.Context, db readDB, start uint32, end uint32) ([]uint32, error) { + sql := sq.Select("sequence").From(ledgerCloseMetaTableName). + Where(sq.And{ + sq.GtOrEq{"sequence": start}, + sq.LtOrEq{"sequence": end}, + }) + var sequences []uint32 + if err := db.Select(ctx, &sequences, sql); err != nil { + return nil, err + } + return sequences, nil +} + type ledgerWriter struct { stmtCache *sq.StmtCache } diff --git a/cmd/stellar-rpc/internal/db/mocks.go b/cmd/stellar-rpc/internal/db/mocks.go index de7c0b39..927280f6 100644 --- a/cmd/stellar-rpc/internal/db/mocks.go +++ b/cmd/stellar-rpc/internal/db/mocks.go @@ -117,6 +117,10 @@ func (m *MockLedgerReader) NewTx(_ context.Context) (LedgerReaderTx, error) { return nil, errors.New("mock NewTx error") } +func (m *MockLedgerReader) GetLedgerSequencesInRange(_ context.Context, _, _ uint32) ([]uint32, error) { + return nil, nil +} + var ( _ TransactionReader = &MockTransactionHandler{} _ TransactionWriter = &MockTransactionHandler{} diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index a60bce93..45263400 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -23,6 +23,8 @@ const ( // 12 hours/8640 ledgers on an M4 MacBook Pro, backfill takes: // on pubnet: 5.5 minutes; on testnet: ~2.2 seconds ChunkSize uint32 = OneDayOfLedgers / 2 + // Acceptable number of ledgers that may be missing from the backfill tail/head + ledgerThreshold uint32 = 384 // six checkpoints/~30 minutes of ledgers ) // This struct holds the metadata/constructs necessary for most backfilling operations, including @@ -59,7 +61,6 @@ func NewBackfillMeta( ds datastore.DataStore, dsSchema datastore.DataStoreSchema, ) (BackfillMeta, error) { - // Note: O(L) on number of ledgers in cloud datastore ctx, cancelInit := context.WithTimeout(context.Background(), 5*time.Second) defer cancelInit() @@ -103,11 +104,11 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { backfill.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") ledgersInCheckpoint := cfg.CheckpointFrequency - nBackfill := cfg.HistoryRetentionWindow + ledgersInCheckpoint + nBackfill := cfg.HistoryRetentionWindow // Phase 1: precheck to ensure no pre-existing gaps in local DB if !backfill.dbInfo.isEmpty { - if err := backfill.verifyDbGapless(ctx, backfill.dbInfo.minSeq, backfill.dbInfo.maxSeq); err != nil { + if _, _, err := backfill.verifyDbGapless(ctx); err != nil { return errors.Wrap(err, "backfill precheck failed") } } else { @@ -147,6 +148,7 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { if err := backfill.runBackfillBackwards(ctx, lBoundBackwards, rBoundBackwards); err != nil { return errors.Wrap(err, "backfill backwards failed") } + backfill.dbInfo.minSeq = lBoundBackwards } else { backfill.logger.Infof("No backwards backfill needed, local DB tail already covers retention window") } @@ -156,7 +158,7 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { if rBoundForwards, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - if lBoundForwards <= rBoundForwards { + if lBoundForwards < rBoundForwards { rBoundForwards = rBoundForwards - (rBoundForwards % ledgersInCheckpoint) // Align to checkpoint backfill.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) if err = backfill.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { @@ -165,48 +167,51 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { } else { backfill.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") } + // Log minimum written sequence after backwards backfill + backfill.dbInfo.maxSeq = max(rBoundForwards, backfill.dbInfo.maxSeq) // Phase 4: verify no gaps in local DB after backfill backfill.logger.Infof("Forward backfill complete, starting post-backfill verification") - // Note final ledger we've backfilled to - endSeq := rBoundForwards - if currentTipLedger, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { - return errors.Wrap(err, "could not get latest ledger number from cloud datastore") - } - startSeq := max(currentTipLedger-nBackfill+1, backfill.dbInfo.minSeq) - if err = backfill.verifyDbGapless(ctx, startSeq, endSeq); err != nil { + minSeq, maxSeq, err := backfill.verifyDbGapless(ctx) + count := maxSeq - minSeq + 1 + if err != nil { return errors.Wrap(err, "post-backfill verification failed") } - backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", startSeq, endSeq) + if count+ledgerThreshold < nBackfill { + return fmt.Errorf("post-backfill verification failed: expected at least %d ledgers, got %d ledgers"+ + " (exceeds acceptable threshold of %d ledgers)", nBackfill, count, ledgerThreshold) + } + backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) return nil } // Checks to ensure state of local DB is acceptable for backfilling -func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context, minLedgerSeq uint32, maxLedgerSeq uint32) error { +func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint32, error) { ctx, cancelPrecheck := context.WithTimeout(ctx, 4*time.Minute) defer cancelPrecheck() - tx, err := backfill.dbInfo.reader.NewTx(ctx) + ledgerRange, err := backfill.dbInfo.reader.GetLedgerRange(ctx) if err != nil { - return errors.Wrap(err, "db verify: failed to begin read transaction") + return 0, 0, errors.Wrap(err, "db verify: could not get ledger range") } - defer func() { - if rollbackErr := tx.Done(); rollbackErr != nil { - backfill.logger.Warnf("couldn't rollback in verifyDbGapless: %v", rollbackErr) - } - }() + // Get sequence number of highest/lowest ledgers in local DB + minDbSeq, maxDbSeq := ledgerRange.FirstLedger.Sequence, ledgerRange.LastLedger.Sequence + backfill.logger.Infof("DB verify: checking for gaps in [%d, %d]", + minDbSeq, maxDbSeq) + expectedCount := maxDbSeq - minDbSeq + 1 - count, err := tx.CountLedgersInRange(ctx, minLedgerSeq, maxLedgerSeq) + sequences, err := backfill.dbInfo.reader.GetLedgerSequencesInRange(ctx, minDbSeq, maxDbSeq) if err != nil { - return errors.Wrap(err, "db verify: could not count ledgers in local DB") + return 0, 0, errors.Wrap(err, "db verify: could not get ledger sequences in local DB") } + sequencesMin, sequencesMax := sequences[0], sequences[len(sequences)-1] - if count != maxLedgerSeq-minLedgerSeq+1 { - return fmt.Errorf("db verify: gap detected in local DB: expected %d ledgers, got %d ledgers", - maxLedgerSeq-minLedgerSeq+1, count) + if len(sequences) != int(expectedCount) { + return 0, 0, fmt.Errorf("db verify: gap detected in local DB: expected %d ledgers, got %d ledgers", + expectedCount, len(sequences)) } - return nil + return sequencesMin, sequencesMax, nil } // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore @@ -253,25 +258,26 @@ func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip func (backfill *BackfillMeta) runBackfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { - // // Backend for forwards backfill can be persistent over multiple chunks - // backend, err := makeBackend(backfill.dsInfo) - // if err != nil { - // return errors.Wrap(err, "could not create ledger backend") - // } - // defer func() { - // if err := backend.Close(); err != nil { - // backfill.logger.Warnf("error closing ledger backend: %v", err) - // } - // }() + // Backend for forwards backfill can be persistent over multiple chunks + backend, err := makeBackend(backfill.dsInfo) + if err != nil { + return errors.Wrap(err, "could not create ledger backend") + } + defer func() { + if err := backend.Close(); err != nil { + backfill.logger.Warnf("error closing ledger backend: %v", err) + } + }() for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { if err := ctx.Err(); err != nil { return err } + rChunkBound := min(rBound, lChunkBound+ChunkSize-1) backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := backfill.fillChunk(ctx, backfill.ingestService, backfill.ingestService.ledgerBackend, lChunkBound, rChunkBound); err != nil { + if err := backfill.fillChunk(ctx, backfill.ingestService, backend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -288,40 +294,7 @@ func (backfill *BackfillMeta) fillChunk( readBackend ledgerbackend.LedgerBackend, left, right uint32, ) error { - service.ingestRange(ctx, readBackend, ledgerbackend.BoundedRange(left, right)) - // var ledger xdr.LedgerCloseMeta - - // tx, err := backfill.dbInfo.rw.NewTx(ctx) - // if err != nil { - // return errors.Wrap(err, "couldn't create local db write tx") - // } - // // Log rollback errors on failure with where they failed - // defer func() { - // if rollbackErr := tx.Rollback(); rollbackErr != nil { - // backfill.logger.Warnf("couldn't rollback in fillChunk: %v", rollbackErr) - // } - // }() - - // backfillRange := ledgerbackend.BoundedRange(left, right) - // if err := readBackend.PrepareRange(ctx, backfillRange); err != nil { - // return errors.Wrapf(err, "couldn't prepare range [%d, %d]", left, right) - // } - - // for seq := left; seq <= right; seq++ { - // Fetch ledger from backend, commit to local DB - // ledger, err = readBackend.GetLedger(ctx, seq) - // if err != nil { - // return errors.Wrapf(err, "couldn't get ledger %d from backend", seq) - // } - // if err = tx.LedgerWriter().InsertLedger(ledger); err != nil { - // return errors.Wrapf(err, "couldn't write ledger %d to local db", seq) - // } - - // } - // if err := tx.Commit(ledger, nil); err != nil { - // return errors.Wrapf(err, "couldn't commit range [%d, %d]", left, right) - // } - return nil + return service.ingestRange(ctx, readBackend, ledgerbackend.BoundedRange(left, right)) } // Creates a buffered storage backend for the given datastore diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index 2bb49ed1..68663c27 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -259,6 +259,9 @@ func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBacken if err != nil { return err } + if err := tx.LedgerWriter().InsertLedger(ledgerCloseMeta); err != nil { + return err + } } durationMetrics := map[string]time.Duration{} diff --git a/cmd/stellar-rpc/internal/methods/mocks.go b/cmd/stellar-rpc/internal/methods/mocks.go index 1d152ccf..ae17cb72 100644 --- a/cmd/stellar-rpc/internal/methods/mocks.go +++ b/cmd/stellar-rpc/internal/methods/mocks.go @@ -38,6 +38,11 @@ func (m *MockLedgerReader) GetLedgerRange(ctx context.Context) (ledgerbucketwind return args.Get(0).(ledgerbucketwindow.LedgerRange), args.Error(1) //nolint:forcetypeassert } +func (m *MockLedgerReader) GetLedgerSequencesInRange(ctx context.Context, start, end uint32) ([]uint32, error) { + args := m.Called(ctx, start, end) + return args.Get(0).([]uint32), args.Error(1) //nolint:forcetypeassert +} + func (m *MockLedgerReader) StreamLedgerRange(ctx context.Context, startLedger, endLedger uint32, f db.StreamLedgerFn, ) error { @@ -69,11 +74,6 @@ func (m *MockLedgerReaderTx) BatchGetLedgers(ctx context.Context, start, end uin return args.Get(0).([]db.LedgerMetadataChunk), args.Error(1) //nolint:forcetypeassert } -func (m *MockLedgerReaderTx) CountLedgersInRange(ctx context.Context, start, end uint32) (uint32, error) { - args := m.Called(ctx, start, end) - return args.Get(0).(uint32), args.Error(1) //nolint:forcetypeassert -} - func (m *MockLedgerReaderTx) GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, bool, error) { args := m.Called(ctx, sequence) return args.Get(0).(xdr.LedgerCloseMeta), args.Bool(1), args.Error(2) //nolint:forcetypeassert From c4241edd13d69096099779672301c6b5657501b4 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 6 Jan 2026 12:05:56 -0500 Subject: [PATCH 26/72] updated git latest ledger test interface --- cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go b/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go index 369c17c8..a7f6d6c2 100644 --- a/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go +++ b/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go @@ -33,6 +33,10 @@ func (ledgerReader *ConstantLedgerReader) GetLedgerRange(_ context.Context) (led return ledgerbucketwindow.LedgerRange{}, nil } +func (ledgerReader *ConstantLedgerReader) GetLedgerSequencesInRange(_ context.Context, _, _ uint32) ([]uint32, error) { + return nil, nil +} + func (ledgerReader *ConstantLedgerReader) NewTx(_ context.Context) (db.LedgerReaderTx, error) { return nil, errors.New("mock NewTx error") } From 7e733c9d490977b985ef7f9b5b3d7f6b5aaa4d2e Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 6 Jan 2026 15:49:52 -0500 Subject: [PATCH 27/72] repaired service test following startService refactor --- cmd/stellar-rpc/internal/ingest/service_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/stellar-rpc/internal/ingest/service_test.go b/cmd/stellar-rpc/internal/ingest/service_test.go index 4d0067d9..68ca329e 100644 --- a/cmd/stellar-rpc/internal/ingest/service_test.go +++ b/cmd/stellar-rpc/internal/ingest/service_test.go @@ -54,6 +54,7 @@ func TestRetryRunningIngestion(t *testing.T) { Daemon: interfaces.MakeNoOpDeamon(), } service := NewService(config) + StartService(service, config) retryWg.Wait() service.Close() assert.Equal(t, 1, numRetries) From 484f3606934aae9bd03122e37673e852ad4e6455 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 6 Jan 2026 15:51:27 -0500 Subject: [PATCH 28/72] added backfill gap detection unit test --- .../internal/ingest/backfill_test.go | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 cmd/stellar-rpc/internal/ingest/backfill_test.go diff --git a/cmd/stellar-rpc/internal/ingest/backfill_test.go b/cmd/stellar-rpc/internal/ingest/backfill_test.go new file mode 100644 index 00000000..6af8d335 --- /dev/null +++ b/cmd/stellar-rpc/internal/ingest/backfill_test.go @@ -0,0 +1,80 @@ +package ingest + +import ( + "context" + "path" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/go-stellar-sdk/network" + supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/daemon/interfaces" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/db" +) + +func TestGapDetection(t *testing.T) { + ctx := context.Background() + testLogger := supportlog.New() + + tmp := t.TempDir() + dbPath := path.Join(tmp, "test.sqlite") + testDB, err := db.OpenSQLiteDB(dbPath) + require.NoError(t, err) + defer testDB.Close() + + rw := db.NewReadWriter(testLogger, testDB, interfaces.MakeNoOpDeamon(), 10, 10, + network.FutureNetworkPassphrase) + + writeTx, err := rw.NewTx(ctx) + require.NoError(t, err) + + // Missing ledger 103 + ledgers := []xdr.LedgerCloseMeta{ + createLedger(100), + createLedger(101), + createLedger(102), + createLedger(104), + createLedger(105), + } + for _, ledger := range ledgers { + require.NoError(t, writeTx.LedgerWriter().InsertLedger(ledger)) + } + require.NoError(t, writeTx.Commit(ledgers[len(ledgers)-1], nil)) + backfill := &BackfillMeta{ + logger: testLogger, + dbInfo: databaseInfo{rw: rw, reader: db.NewLedgerReader(testDB)}, + } + _, _, err = backfill.verifyDbGapless(ctx) + require.Error(t, err) + require.ErrorContains(t, err, "gap detected in local DB") + + // Now insert the missing ledger and verify no gap is detected + writeTx, err = rw.NewTx(ctx) + require.NoError(t, err) + require.NoError(t, writeTx.LedgerWriter().InsertLedger(createLedger(103))) + require.NoError(t, writeTx.Commit(ledgers[len(ledgers)-1], nil)) + + _, _, err = backfill.verifyDbGapless(ctx) + require.NoError(t, err) +} + +func createLedger(ledgerSequence uint32) xdr.LedgerCloseMeta { + return xdr.LedgerCloseMeta{ + V: 1, + V1: &xdr.LedgerCloseMetaV1{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Hash: xdr.Hash{}, + Header: xdr.LedgerHeader{ + LedgerSeq: xdr.Uint32(ledgerSequence), + }, + }, + TxSet: xdr.GeneralizedTransactionSet{ + V: 1, + V1TxSet: &xdr.TransactionSetV1{}, + }, + }, + } +} From 3bdc2401167dce0c3519a0b1a1d6bc5faf69e22d Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 7 Jan 2026 21:36:00 -0500 Subject: [PATCH 29/72] increased robustness, fixed post-backfill ingestion bug --- cmd/stellar-rpc/internal/daemon/daemon.go | 3 +++ cmd/stellar-rpc/internal/db/db.go | 7 +++++++ cmd/stellar-rpc/internal/feewindow/feewindow.go | 14 ++++++++++++++ cmd/stellar-rpc/internal/ingest/backfill.go | 4 ++-- cmd/stellar-rpc/internal/ingest/backfill_test.go | 2 +- .../ledgerbucketwindow/ledgerbucketwindow.go | 6 ++++++ 6 files changed, 33 insertions(+), 3 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index 195afdb9..a9d9bb5a 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -219,6 +219,9 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { if err := backfillMeta.RunBackfill(cfg); err != nil { logger.WithError(err).Fatal("failed to backfill ledgers") } + // Clear the DB cache and fee windows so they re-populate from the database + daemon.db.ResetCache() + feewindows.Reset() } // Start ingestion service only after backfill is complete ingest.StartService(daemon.ingestService, ingestCfg) diff --git a/cmd/stellar-rpc/internal/db/db.go b/cmd/stellar-rpc/internal/db/db.go index af23bc8e..49cd7f7f 100644 --- a/cmd/stellar-rpc/internal/db/db.go +++ b/cmd/stellar-rpc/internal/db/db.go @@ -58,6 +58,13 @@ type DB struct { cache *dbCache } +func (d *DB) ResetCache() { + d.cache.Lock() + d.cache.latestLedgerSeq = 0 + d.cache.latestLedgerCloseTime = 0 + d.cache.Unlock() +} + func openSQLiteDB(dbFilePath string) (*db.Session, error) { // 1. Use Write-Ahead Logging (WAL). // 2. Disable WAL auto-checkpointing (we will do the checkpointing ourselves with wal_checkpoint pragmas diff --git a/cmd/stellar-rpc/internal/feewindow/feewindow.go b/cmd/stellar-rpc/internal/feewindow/feewindow.go index 4faefbae..77ae56c1 100644 --- a/cmd/stellar-rpc/internal/feewindow/feewindow.go +++ b/cmd/stellar-rpc/internal/feewindow/feewindow.go @@ -64,6 +64,14 @@ func (fw *FeeWindow) AppendLedgerFees(fees ledgerbucketwindow.LedgerBucket[[]uin return nil } +// Reset clears all ledger fees from the window +func (fw *FeeWindow) Reset() { + fw.lock.Lock() + defer fw.lock.Unlock() + fw.feesPerLedger.Reset() + fw.distribution = FeeDistribution{} +} + func computeFeeDistribution(fees []uint64, ledgerCount uint32) FeeDistribution { if len(fees) == 0 { return FeeDistribution{} @@ -207,6 +215,12 @@ func (fw *FeeWindows) IngestFees(meta xdr.LedgerCloseMeta) error { return nil } +// Reset clears all fee windows +func (fw *FeeWindows) Reset() { + fw.SorobanInclusionFeeWindow.Reset() + fw.ClassicFeeWindow.Reset() +} + func (fw *FeeWindows) AsMigration(seqRange db.LedgerSeqRange) db.Migration { return &feeWindowMigration{ firstLedger: seqRange.First, diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 45263400..35bab458 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -178,8 +178,8 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { return errors.Wrap(err, "post-backfill verification failed") } if count+ledgerThreshold < nBackfill { - return fmt.Errorf("post-backfill verification failed: expected at least %d ledgers, got %d ledgers"+ - " (exceeds acceptable threshold of %d ledgers)", nBackfill, count, ledgerThreshold) + return errors.New(fmt.Sprintf("post-backfill verification failed: expected at least %d ledgers, "+ + "got %d ledgers (exceeds acceptable threshold of %d ledgers)", nBackfill, count, ledgerThreshold)) } backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) return nil diff --git a/cmd/stellar-rpc/internal/ingest/backfill_test.go b/cmd/stellar-rpc/internal/ingest/backfill_test.go index 6af8d335..244f4ffb 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill_test.go +++ b/cmd/stellar-rpc/internal/ingest/backfill_test.go @@ -26,7 +26,7 @@ func TestGapDetection(t *testing.T) { defer testDB.Close() rw := db.NewReadWriter(testLogger, testDB, interfaces.MakeNoOpDeamon(), 10, 10, - network.FutureNetworkPassphrase) + network.TestNetworkPassphrase) writeTx, err := rw.NewTx(ctx) require.NoError(t, err) diff --git a/cmd/stellar-rpc/internal/ledgerbucketwindow/ledgerbucketwindow.go b/cmd/stellar-rpc/internal/ledgerbucketwindow/ledgerbucketwindow.go index baf88014..4e0ca509 100644 --- a/cmd/stellar-rpc/internal/ledgerbucketwindow/ledgerbucketwindow.go +++ b/cmd/stellar-rpc/internal/ledgerbucketwindow/ledgerbucketwindow.go @@ -110,3 +110,9 @@ func (w *LedgerBucketWindow[T]) Get(i uint32) *LedgerBucket[T] { index := (w.start + i) % length return &w.buckets[index] } + +// Reset clears all buckets from the window +func (w *LedgerBucketWindow[T]) Reset() { + w.buckets = w.buckets[:0] + w.start = 0 +} From e9470d927a5f322827fad7d1466e275f0efc037d Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 7 Jan 2026 21:36:40 -0500 Subject: [PATCH 30/72] added buggy integration test --- .../internal/integrationtest/backfill_test.go | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 cmd/stellar-rpc/internal/integrationtest/backfill_test.go diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go new file mode 100644 index 00000000..3c39273f --- /dev/null +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -0,0 +1,102 @@ +package integrationtest + +import ( + "context" + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/fsouza/fake-gcs-server/fakestorage" + "github.com/stretchr/testify/require" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + protocol "github.com/stellar/go-stellar-sdk/protocols/rpc" + "github.com/stellar/go-stellar-sdk/support/datastore" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/config" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/integrationtest/infrastructure" +) + +func TestBackfillEmptyDB(t *testing.T) { + var startSeq, endSeq uint32 = 2, 900 + fmt.Printf("hello from test!!\n") + // setup fake GCS server + opts := fakestorage.Options{ + Scheme: "http", + PublicHost: "127.0.0.1", + } + gcsServer, err := fakestorage.NewServerWithOptions(opts) + require.NoError(t, err, "failed to start fake GCS server") + defer gcsServer.Stop() + + // t.Setenv("STELLAR_RPC_INTEGRATION_TESTS_ENABLED", "1") + t.Setenv("STORAGE_EMULATOR_HOST", gcsServer.URL()) + bucketName := "test-bucket" + gcsServer.CreateBucketWithOpts(fakestorage.CreateBucketOpts{Name: bucketName}) + + // datastore config + schema := datastore.DataStoreSchema{ + FilesPerPartition: 1, + LedgersPerFile: 1, + } + makeDatastoreConfig := func(cfg *config.Config) { + // configure for backfill + cfg.ServeLedgersFromDatastore = true + cfg.Backfill = true + cfg.BufferedStorageBackendConfig = ledgerbackend.BufferedStorageBackendConfig{ + BufferSize: 15, + NumWorkers: 2, + } + cfg.DataStoreConfig = datastore.DataStoreConfig{ + Type: "GCS", + Params: map[string]string{"destination_bucket_path": bucketName}, + Schema: schema, + } + cfg.HistoryRetentionWindow = 100 + cfg.ClassicFeeStatsLedgerRetentionWindow = 100 + cfg.SorobanFeeStatsLedgerRetentionWindow = 100 + } + + // add files to GCS + for seq := startSeq; seq <= endSeq; seq++ { + gcsServer.CreateObject(fakestorage.Object{ + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: bucketName, + Name: schema.GetObjectKeyFromSequenceNumber(seq), + }, + Content: createLCMBatchBuffer(seq), + }) + } + + fmt.Printf("hello from before!!\n") + // Skip applying protocol limit upgrades to avoid requiring specific XDR files + // skipLimits := "" + test := infrastructure.NewTest(t, &infrastructure.TestConfig{ + DatastoreConfigFunc: makeDatastoreConfig, + NoParallel: true, + }) + fmt.Printf("hello after!!\n") + // Wait for backfill to complete + time.Sleep(20 * time.Second) + + // Verify backfill worked by querying ledgers + client := test.GetRPCLient() + result, err := client.GetLedgers(context.Background(), protocol.GetLedgersRequest{ + StartLedger: startSeq, + Pagination: &protocol.LedgerPaginationOptions{ + Limit: uint(endSeq - startSeq + 1), + }, + }) + b, _ := json.MarshalIndent(result, "", " ") + t.Logf("result:\n%s", string(b)) + require.NoError(t, err) + require.Len(t, result.Ledgers, int(endSeq-startSeq+1), + "expected to get %d contiguous ledgers from backfill", endSeq-startSeq+1) + // ensure contiguous + for i, ledger := range result.Ledgers { + expectedSeq := startSeq + uint32(i) + require.Equal(t, expectedSeq, ledger.Sequence, + "gap detected at position %d: expected %d, got %d", i, expectedSeq, ledger.Sequence) + } +} From 612f33f7397fb98f007a0667b656d1bdf281c3e4 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 8 Jan 2026 16:26:38 -0500 Subject: [PATCH 31/72] integration test bones working --- .../internal/integrationtest/backfill_test.go | 113 ++++++++++++------ .../integrationtest/infrastructure/test.go | 5 + 2 files changed, 84 insertions(+), 34 deletions(-) diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 3c39273f..03243633 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -2,8 +2,6 @@ package integrationtest import ( "context" - "encoding/json" - "fmt" "testing" "time" @@ -19,8 +17,9 @@ import ( ) func TestBackfillEmptyDB(t *testing.T) { - var startSeq, endSeq uint32 = 2, 900 - fmt.Printf("hello from test!!\n") + // Seed datastore with ledgers to backfill + var backfillStart, backfillEnd uint32 = 2, 64 + // setup fake GCS server opts := fakestorage.Options{ Scheme: "http", @@ -29,19 +28,44 @@ func TestBackfillEmptyDB(t *testing.T) { gcsServer, err := fakestorage.NewServerWithOptions(opts) require.NoError(t, err, "failed to start fake GCS server") defer gcsServer.Stop() - - // t.Setenv("STELLAR_RPC_INTEGRATION_TESTS_ENABLED", "1") - t.Setenv("STORAGE_EMULATOR_HOST", gcsServer.URL()) bucketName := "test-bucket" - gcsServer.CreateBucketWithOpts(fakestorage.CreateBucketOpts{Name: bucketName}) + t.Setenv("STORAGE_EMULATOR_HOST", gcsServer.URL()) + gcsServer.CreateBucketWithOpts(fakestorage.CreateBucketOpts{Name: bucketName}) + objPrefix := "v1/ledgers/testnet" + bucketPath := bucketName + "/" + objPrefix // datastore config schema := datastore.DataStoreSchema{ - FilesPerPartition: 1, + FilesPerPartition: 64000, LedgersPerFile: 1, + // FileExtension: "zst", // SDK adds .xdr automatically } + + // // Create manifest file + // manifest := map[string]interface{}{ + // "version": "1.0", + // "ledgers_per_file": int(schema.LedgersPerFile), + // "files_per_partition": int(schema.FilesPerPartition), + // "file_extension": ".zst", + // "network_passphrase": "Test SDF Network ; September 2015", + // "compression": "zstd", + // } + // manifestBytes, err := json.Marshal(manifest) + // require.NoError(t, err) + + // t.Logf("Creating manifest: %s", string(manifestBytes)) + + // gcsServer.CreateObject(fakestorage.Object{ + // ObjectAttrs: fakestorage.ObjectAttrs{ + // BucketName: bucketName, + // Name: ".config.json", + // }, + // Content: manifestBytes, + // }) + + // Configure with backfill enabled and retention window of 128 ledgers + retentionWindow := uint32(48) makeDatastoreConfig := func(cfg *config.Config) { - // configure for backfill cfg.ServeLedgersFromDatastore = true cfg.Backfill = true cfg.BufferedStorageBackendConfig = ledgerbackend.BufferedStorageBackendConfig{ @@ -50,53 +74,74 @@ func TestBackfillEmptyDB(t *testing.T) { } cfg.DataStoreConfig = datastore.DataStoreConfig{ Type: "GCS", - Params: map[string]string{"destination_bucket_path": bucketName}, - Schema: schema, + Params: map[string]string{"destination_bucket_path": bucketPath}, + Schema: schema, // Provide schema in config } - cfg.HistoryRetentionWindow = 100 - cfg.ClassicFeeStatsLedgerRetentionWindow = 100 - cfg.SorobanFeeStatsLedgerRetentionWindow = 100 + cfg.HistoryRetentionWindow = retentionWindow + cfg.ClassicFeeStatsLedgerRetentionWindow = retentionWindow + cfg.SorobanFeeStatsLedgerRetentionWindow = retentionWindow } - // add files to GCS - for seq := startSeq; seq <= endSeq; seq++ { + // Add ledger files to datastore + for seq := backfillStart; seq <= backfillEnd; seq++ { gcsServer.CreateObject(fakestorage.Object{ ObjectAttrs: fakestorage.ObjectAttrs{ BucketName: bucketName, - Name: schema.GetObjectKeyFromSequenceNumber(seq), + Name: objPrefix + "/" + schema.GetObjectKeyFromSequenceNumber(seq), // schema.GetObjectKeyFromSequenceNumber(seq), }, Content: createLCMBatchBuffer(seq), }) } - fmt.Printf("hello from before!!\n") - // Skip applying protocol limit upgrades to avoid requiring specific XDR files - // skipLimits := "" + // Start test with empty DB - captive core will start producing ledgers from checkpoint + noUpgrade := "" test := infrastructure.NewTest(t, &infrastructure.TestConfig{ DatastoreConfigFunc: makeDatastoreConfig, - NoParallel: true, + NoParallel: true, // can't use parallel due to env vars + DontWaitForRPC: true, + ApplyLimits: &noUpgrade, }) - fmt.Printf("hello after!!\n") - // Wait for backfill to complete - time.Sleep(20 * time.Second) - // Verify backfill worked by querying ledgers client := test.GetRPCLient() + + // Helper to wait for conditions + waitUntil := func(cond func(l protocol.GetLatestLedgerResponse) bool, timeout time.Duration) protocol.GetLatestLedgerResponse { + var last protocol.GetLatestLedgerResponse + require.Eventually(t, func() bool { + resp, err := client.GetLatestLedger(t.Context()) + require.NoError(t, err) + last = resp + return cond(resp) + }, timeout, 100*time.Millisecond, "last ledger backfilled: %+v", last.Sequence) + return last + } + + finalBackfilledLedger := waitUntil(func(l protocol.GetLatestLedgerResponse) bool { + return l.Sequence >= backfillEnd + }, 60*time.Second) + t.Logf("Successfully backfilled to ledger: %d", finalBackfilledLedger.Sequence) + result, err := client.GetLedgers(context.Background(), protocol.GetLedgersRequest{ - StartLedger: startSeq, + StartLedger: backfillStart, Pagination: &protocol.LedgerPaginationOptions{ - Limit: uint(endSeq - startSeq + 1), + Limit: uint(backfillEnd - backfillStart + 1), }, }) - b, _ := json.MarshalIndent(result, "", " ") - t.Logf("result:\n%s", string(b)) + require.NoError(t, err) - require.Len(t, result.Ledgers, int(endSeq-startSeq+1), - "expected to get %d contiguous ledgers from backfill", endSeq-startSeq+1) - // ensure contiguous + require.Len(t, result.Ledgers, int(backfillEnd-backfillStart+1), + "expected to get backfilled ledgers from local DB") + + // Verify they're contiguous for i, ledger := range result.Ledgers { - expectedSeq := startSeq + uint32(i) + expectedSeq := backfillStart + uint32(i) require.Equal(t, expectedSeq, ledger.Sequence, "gap detected at position %d: expected %d, got %d", i, expectedSeq, ledger.Sequence) } + + test.Shutdown() + + // TODO: seed more ledgers, restart WITHOUT backfill, verify database in good state and ingestion is normal + // this simulates post-ingestion backfill; backfill clears all caches and starts the rest of RPC + // but because backfill's latest ledger is always goign to be the highest one in GCS we must shutdown } diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index 90f0c64d..317394a6 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -87,6 +87,8 @@ type TestConfig struct { // empty string to skip upgrading altogether. ApplyLimits *string + DontWaitForRPC bool // skip waiting for RPC to be healthy + DatastoreConfigFunc func(*config.Config) } @@ -158,6 +160,9 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { parallel = !cfg.NoParallel i.datastoreConfigFunc = cfg.DatastoreConfigFunc + if cfg.DontWaitForRPC { + shouldWaitForRPC = false + } if cfg.OnlyRPC != nil { i.onlyRPC = true i.testPorts.TestCorePorts = cfg.OnlyRPC.CorePorts From 954b6efc11d8ef40863339c6b22084e95fb3d394 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 9 Jan 2026 12:44:47 -0500 Subject: [PATCH 32/72] added several integration tests --- .../internal/integrationtest/backfill_test.go | 214 ++++++++++++------ 1 file changed, 140 insertions(+), 74 deletions(-) diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 03243633..2d009563 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -2,6 +2,7 @@ package integrationtest import ( "context" + "path" "testing" "time" @@ -9,25 +10,123 @@ import ( "github.com/stretchr/testify/require" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/network" protocol "github.com/stellar/go-stellar-sdk/protocols/rpc" "github.com/stellar/go-stellar-sdk/support/datastore" + supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/config" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/daemon/interfaces" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/db" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/integrationtest/infrastructure" ) func TestBackfillEmptyDB(t *testing.T) { - // Seed datastore with ledgers to backfill - var backfillStart, backfillEnd uint32 = 2, 64 + // GCS has ledgers from 2-192; history retention window is 128 + var localDbStart, localDbEnd uint32 = 0, 0 + testBackfillWithSeededDbLedgers(t, localDbStart, localDbEnd) +} + +// Backfill with some ledgers in middle of local DB (simulates quitting mid-backfill-backwards phase) +// This induces a backfill backwards from localStart-1 to (datastoreEnd - retentionWindow), +// then forwards from localEnd+1 to datastoreEnd +func TestBackfillLedgersInMiddleOfDB(t *testing.T) { + // GCS has ledgers from 2-192; history retention window is 128 + var localDbStart, localDbEnd uint32 = 50, 100 + testBackfillWithSeededDbLedgers(t, localDbStart, localDbEnd) +} + +// Backfill with some ledgers at start of DB (simulates pulling plug when backfilling forwards) +// This is a "only backfill forwards" scenario +func TestBackfillLedgersAtStartOfDB(t *testing.T) { + // GCS has ledgers from 2-192; history retention window is 128 + var localDbStart, localDbEnd uint32 = 2, 100 + testBackfillWithSeededDbLedgers(t, localDbStart, localDbEnd) +} + +func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint32) { + var ( + datastoreStart, datastoreEnd uint32 = 2, 192 + retentionWindow uint32 = 128 + ) + + gcsServer, makeDatastoreConfig := makeNewFakeGCSServer(t, datastoreStart, datastoreEnd, retentionWindow) + defer gcsServer.Stop() + + t.Setenv("ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING", "true") + + // Create temporary SQLite DB populated with dummy ledgers + var dbPath string + if localDbEnd != 0 { + tmp := t.TempDir() + dbPath = path.Join(tmp, "test.sqlite") + testDB := createDbWithLedgers(t, dbPath, localDbStart, localDbEnd) + defer testDB.Close() + } + + t.Logf("Seeded local DB with ledgers %d-%d", localDbStart, localDbEnd) + noUpgrade := "" + test := infrastructure.NewTest(t, &infrastructure.TestConfig{ + SQLitePath: dbPath, + DatastoreConfigFunc: makeDatastoreConfig, + NoParallel: true, // can't use parallel due to env vars + DontWaitForRPC: true, + ApplyLimits: &noUpgrade, + }) + + client := test.GetRPCLient() - // setup fake GCS server + // Helper to wait for conditions + waitUntil := func( + cond func(l protocol.GetLatestLedgerResponse) bool, + timeout time.Duration, + ) protocol.GetLatestLedgerResponse { + var last protocol.GetLatestLedgerResponse + require.Eventually(t, func() bool { + resp, err := client.GetLatestLedger(t.Context()) + require.NoError(t, err) + last = resp + return cond(resp) + }, timeout, 100*time.Millisecond, "last ledger backfilled: %+v", last.Sequence) + return last + } + + finalBackfilledLedger := waitUntil(func(l protocol.GetLatestLedgerResponse) bool { + return l.Sequence >= datastoreEnd + }, 60*time.Second) + t.Logf("Successfully backfilled to ledger: %d", finalBackfilledLedger.Sequence) + + result, err := client.GetLedgers(context.Background(), protocol.GetLedgersRequest{ + StartLedger: datastoreStart, + Pagination: &protocol.LedgerPaginationOptions{ + Limit: uint(datastoreEnd - datastoreStart + 1), + }, + }) + + require.NoError(t, err) + require.Len(t, result.Ledgers, int(datastoreEnd-datastoreStart+1), + "expected to get backfilled ledgers from local DB") + + // Verify they're contiguous + for i, ledger := range result.Ledgers { + expectedSeq := datastoreStart + uint32(i) + require.Equal(t, expectedSeq, ledger.Sequence, + "gap detected at position %d: expected %d, got %d", i, expectedSeq, ledger.Sequence) + } +} + +func makeNewFakeGCSServer(t *testing.T, + datastoreStart, + datastoreEnd, + retentionWindow uint32, +) (*fakestorage.Server, func(*config.Config)) { opts := fakestorage.Options{ Scheme: "http", PublicHost: "127.0.0.1", } gcsServer, err := fakestorage.NewServerWithOptions(opts) require.NoError(t, err, "failed to start fake GCS server") - defer gcsServer.Stop() bucketName := "test-bucket" t.Setenv("STORAGE_EMULATOR_HOST", gcsServer.URL()) @@ -38,33 +137,9 @@ func TestBackfillEmptyDB(t *testing.T) { schema := datastore.DataStoreSchema{ FilesPerPartition: 64000, LedgersPerFile: 1, - // FileExtension: "zst", // SDK adds .xdr automatically } - // // Create manifest file - // manifest := map[string]interface{}{ - // "version": "1.0", - // "ledgers_per_file": int(schema.LedgersPerFile), - // "files_per_partition": int(schema.FilesPerPartition), - // "file_extension": ".zst", - // "network_passphrase": "Test SDF Network ; September 2015", - // "compression": "zstd", - // } - // manifestBytes, err := json.Marshal(manifest) - // require.NoError(t, err) - - // t.Logf("Creating manifest: %s", string(manifestBytes)) - - // gcsServer.CreateObject(fakestorage.Object{ - // ObjectAttrs: fakestorage.ObjectAttrs{ - // BucketName: bucketName, - // Name: ".config.json", - // }, - // Content: manifestBytes, - // }) - // Configure with backfill enabled and retention window of 128 ledgers - retentionWindow := uint32(48) makeDatastoreConfig := func(cfg *config.Config) { cfg.ServeLedgersFromDatastore = true cfg.Backfill = true @@ -75,7 +150,7 @@ func TestBackfillEmptyDB(t *testing.T) { cfg.DataStoreConfig = datastore.DataStoreConfig{ Type: "GCS", Params: map[string]string{"destination_bucket_path": bucketPath}, - Schema: schema, // Provide schema in config + Schema: schema, } cfg.HistoryRetentionWindow = retentionWindow cfg.ClassicFeeStatsLedgerRetentionWindow = retentionWindow @@ -83,7 +158,7 @@ func TestBackfillEmptyDB(t *testing.T) { } // Add ledger files to datastore - for seq := backfillStart; seq <= backfillEnd; seq++ { + for seq := datastoreStart; seq <= datastoreEnd; seq++ { gcsServer.CreateObject(fakestorage.Object{ ObjectAttrs: fakestorage.ObjectAttrs{ BucketName: bucketName, @@ -93,55 +168,46 @@ func TestBackfillEmptyDB(t *testing.T) { }) } - // Start test with empty DB - captive core will start producing ledgers from checkpoint - noUpgrade := "" - test := infrastructure.NewTest(t, &infrastructure.TestConfig{ - DatastoreConfigFunc: makeDatastoreConfig, - NoParallel: true, // can't use parallel due to env vars - DontWaitForRPC: true, - ApplyLimits: &noUpgrade, - }) - - client := test.GetRPCLient() - - // Helper to wait for conditions - waitUntil := func(cond func(l protocol.GetLatestLedgerResponse) bool, timeout time.Duration) protocol.GetLatestLedgerResponse { - var last protocol.GetLatestLedgerResponse - require.Eventually(t, func() bool { - resp, err := client.GetLatestLedger(t.Context()) - require.NoError(t, err) - last = resp - return cond(resp) - }, timeout, 100*time.Millisecond, "last ledger backfilled: %+v", last.Sequence) - return last - } + return gcsServer, makeDatastoreConfig +} - finalBackfilledLedger := waitUntil(func(l protocol.GetLatestLedgerResponse) bool { - return l.Sequence >= backfillEnd - }, 60*time.Second) - t.Logf("Successfully backfilled to ledger: %d", finalBackfilledLedger.Sequence) +func createDbWithLedgers(t *testing.T, dbPath string, start, end uint32) *db.DB { + testDB, err := db.OpenSQLiteDB(dbPath) + require.NoError(t, err) + defer testDB.Close() - result, err := client.GetLedgers(context.Background(), protocol.GetLedgersRequest{ - StartLedger: backfillStart, - Pagination: &protocol.LedgerPaginationOptions{ - Limit: uint(backfillEnd - backfillStart + 1), - }, - }) + testLogger := supportlog.New() + rw := db.NewReadWriter(testLogger, testDB, interfaces.MakeNoOpDeamon(), 10, 10, + network.TestNetworkPassphrase) + // Insert dummy ledgers into the DB + writeTx, err := rw.NewTx(context.Background()) require.NoError(t, err) - require.Len(t, result.Ledgers, int(backfillEnd-backfillStart+1), - "expected to get backfilled ledgers from local DB") - // Verify they're contiguous - for i, ledger := range result.Ledgers { - expectedSeq := backfillStart + uint32(i) - require.Equal(t, expectedSeq, ledger.Sequence, - "gap detected at position %d: expected %d, got %d", i, expectedSeq, ledger.Sequence) + var lastLedger xdr.LedgerCloseMeta + for seq := start; seq <= end; seq++ { + ledger := createLedger(seq) + require.NoError(t, writeTx.LedgerWriter().InsertLedger(ledger)) + lastLedger = ledger } + require.NoError(t, writeTx.Commit(lastLedger, nil)) + return testDB +} - test.Shutdown() - - // TODO: seed more ledgers, restart WITHOUT backfill, verify database in good state and ingestion is normal - // this simulates post-ingestion backfill; backfill clears all caches and starts the rest of RPC - // but because backfill's latest ledger is always goign to be the highest one in GCS we must shutdown +func createLedger(ledgerSequence uint32) xdr.LedgerCloseMeta { + return xdr.LedgerCloseMeta{ + V: 1, + V1: &xdr.LedgerCloseMetaV1{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Hash: xdr.Hash{}, + Header: xdr.LedgerHeader{ + LedgerSeq: xdr.Uint32(ledgerSequence), + }, + }, + TxSet: xdr.GeneralizedTransactionSet{ + V: 1, + V1TxSet: &xdr.TransactionSetV1{}, + }, + }, + } } From e4f0ca10ac58c566366aaef743282dac297b7af8 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 9 Jan 2026 12:58:39 -0500 Subject: [PATCH 33/72] fixed linter + error messages --- cmd/stellar-rpc/internal/ingest/backfill.go | 6 +++--- cmd/stellar-rpc/internal/ingest/backfill_test.go | 3 +-- .../internal/integrationtest/backfill_test.go | 11 ++++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 35bab458..eb142cc0 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -159,7 +159,7 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } if lBoundForwards < rBoundForwards { - rBoundForwards = rBoundForwards - (rBoundForwards % ledgersInCheckpoint) // Align to checkpoint + rBoundForwards -= (rBoundForwards % ledgersInCheckpoint) // Align to checkpoint backfill.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) if err = backfill.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { return errors.Wrap(err, "backfill forwards failed") @@ -178,8 +178,8 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { return errors.Wrap(err, "post-backfill verification failed") } if count+ledgerThreshold < nBackfill { - return errors.New(fmt.Sprintf("post-backfill verification failed: expected at least %d ledgers, "+ - "got %d ledgers (exceeds acceptable threshold of %d ledgers)", nBackfill, count, ledgerThreshold)) + return fmt.Errorf("post-backfill verification failed: expected at least %d ledgers, "+ + "got %d ledgers (exceeds acceptable threshold of %d ledgers)", nBackfill, count, ledgerThreshold) } backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) return nil diff --git a/cmd/stellar-rpc/internal/ingest/backfill_test.go b/cmd/stellar-rpc/internal/ingest/backfill_test.go index 244f4ffb..cf148f69 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill_test.go +++ b/cmd/stellar-rpc/internal/ingest/backfill_test.go @@ -1,7 +1,6 @@ package ingest import ( - "context" "path" "testing" @@ -16,7 +15,7 @@ import ( ) func TestGapDetection(t *testing.T) { - ctx := context.Background() + ctx := t.Context() testLogger := supportlog.New() tmp := t.TempDir() diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 2d009563..95b2101a 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -1,7 +1,6 @@ package integrationtest import ( - "context" "path" "testing" "time" @@ -63,9 +62,11 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint dbPath = path.Join(tmp, "test.sqlite") testDB := createDbWithLedgers(t, dbPath, localDbStart, localDbEnd) defer testDB.Close() + t.Logf("Seeded local DB with ledgers %d-%d", localDbStart, localDbEnd) + } else { + t.Logf("No local DB created or seeded, testing with no initial DB") } - t.Logf("Seeded local DB with ledgers %d-%d", localDbStart, localDbEnd) noUpgrade := "" test := infrastructure.NewTest(t, &infrastructure.TestConfig{ SQLitePath: dbPath, @@ -97,7 +98,7 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint }, 60*time.Second) t.Logf("Successfully backfilled to ledger: %d", finalBackfilledLedger.Sequence) - result, err := client.GetLedgers(context.Background(), protocol.GetLedgersRequest{ + result, err := client.GetLedgers(t.Context(), protocol.GetLedgersRequest{ StartLedger: datastoreStart, Pagination: &protocol.LedgerPaginationOptions{ Limit: uint(datastoreEnd - datastoreStart + 1), @@ -162,7 +163,7 @@ func makeNewFakeGCSServer(t *testing.T, gcsServer.CreateObject(fakestorage.Object{ ObjectAttrs: fakestorage.ObjectAttrs{ BucketName: bucketName, - Name: objPrefix + "/" + schema.GetObjectKeyFromSequenceNumber(seq), // schema.GetObjectKeyFromSequenceNumber(seq), + Name: objPrefix + "/" + schema.GetObjectKeyFromSequenceNumber(seq), }, Content: createLCMBatchBuffer(seq), }) @@ -181,7 +182,7 @@ func createDbWithLedgers(t *testing.T, dbPath string, start, end uint32) *db.DB network.TestNetworkPassphrase) // Insert dummy ledgers into the DB - writeTx, err := rw.NewTx(context.Background()) + writeTx, err := rw.NewTx(t.Context()) require.NoError(t, err) var lastLedger xdr.LedgerCloseMeta From 89a4edc73decd2d8541ae53dfdd4d4550b5f73da Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 9 Jan 2026 14:30:03 -0500 Subject: [PATCH 34/72] minor style improvements --- CHANGELOG.md | 3 ++- cmd/stellar-rpc/internal/ingest/backfill.go | 26 ++++++++------------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2976ef21..47a77e0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,10 @@ go get -u github.com/stellar/go-stellar-sdk/protocols/rpc ``` ### Added +- Added `--backfill` configuration parameter providing synchronous backfilling of `history_archive_window` ledgers to the local DB prior to RPC starting ([#571](https://github.com/stellar/stellar-rpc/pull/571)). - Expanded `getLatestLedger` endpoint to also return `closeTime`, `headerXdr`, and `metadataXdr` ([#554](https://github.com/stellar/stellar-rpc/pull/554)). - Added `soroban-env-host` info to `version` command ([#550](https://github.com/stellar/stellar-rpc/pull/550)). -- Added a new `--network` configuration parameter, allowing users to specify a default Stellar network (`testnet`, `pubnet`, or `futurenet`) ([#540](https://github.com/stellar/stellar-rpc/pull/540), [#543](https://github.com/stellar/stellar-rpc/pull/543)). +- Added `--network` configuration parameter, allowing users to specify a default Stellar network (`testnet`, `pubnet`, or `futurenet`) ([#540](https://github.com/stellar/stellar-rpc/pull/540), [#543](https://github.com/stellar/stellar-rpc/pull/543)). - Simulation has been updated to support Protocol 25 ([#548](https://github.com/stellar/stellar-rpc/pull/548)). ### Fixed diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index eb142cc0..13f89627 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -114,26 +114,22 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { } else { backfill.logger.Infof("Local DB is empty, skipping precheck") } - backfill.logger.Infof("Precheck passed! Starting backfill backwards phase (phase 2 of 4)") - - // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window - var currentTipLedger uint32 - var err error + // Determine bounds for ledgers to be written to local DB in backwards and forwards phases + var ( + currentTipLedger uint32 + lBoundBackwards, rBoundBackwards uint32 // bounds for backwards backfill + lBoundForwards, rBoundForwards uint32 // bounds for forwards backfill + err error + ) if currentTipLedger, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } backfill.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) - - // Adjust nBackfill if datastore has fewer ledgers than the retention window if currentTipLedger < nBackfill { backfill.logger.Warnf("Datastore has fewer ledgers (%d) than retention window (%d); "+ "backfilling all available ledgers", currentTipLedger, nBackfill) nBackfill = currentTipLedger } - - // Bounds for ledgers to be written to local DB in backwards and forwards phases - var lBoundBackwards, rBoundBackwards uint32 - var lBoundForwards, rBoundForwards uint32 lBoundBackwards = max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) if backfill.dbInfo.isEmpty { rBoundBackwards = currentTipLedger @@ -142,6 +138,9 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { rBoundBackwards = backfill.dbInfo.minSeq - 1 lBoundForwards = backfill.dbInfo.maxSeq + 1 } + backfill.logger.Infof("Precheck and initialization passed! Starting backfill backwards phase (phase 2 of 4)") + + // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window if lBoundBackwards < rBoundBackwards { backfill.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", lBoundBackwards, rBoundBackwards) @@ -199,7 +198,6 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint backfill.logger.Infof("DB verify: checking for gaps in [%d, %d]", minDbSeq, maxDbSeq) expectedCount := maxDbSeq - minDbSeq + 1 - sequences, err := backfill.dbInfo.reader.GetLedgerSequencesInRange(ctx, minDbSeq, maxDbSeq) if err != nil { return 0, 0, errors.Wrap(err, "db verify: could not get ledger sequences in local DB") @@ -210,7 +208,6 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint return 0, 0, fmt.Errorf("db verify: gap detected in local DB: expected %d ledgers, got %d ledgers", expectedCount, len(sequences)) } - return sequencesMin, sequencesMax, nil } @@ -236,7 +233,6 @@ func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u lChunkBound = lBound } backfill.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := backfill.fillChunk(ctx, backfill.ingestService, tempBackend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } @@ -246,7 +242,6 @@ func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u if err := tempBackend.Close(); err != nil { backfill.logger.Warnf("error closing temporary backend: %v", err) } - if lChunkBound == lBound { break } @@ -276,7 +271,6 @@ func (backfill *BackfillMeta) runBackfillForwards(ctx context.Context, lBound ui rChunkBound := min(rBound, lChunkBound+ChunkSize-1) backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := backfill.fillChunk(ctx, backfill.ingestService, backend, lChunkBound, rChunkBound); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } From efc3585fc776ffb7f90d4437dccf7dcb352124aa Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 9 Jan 2026 17:17:15 -0500 Subject: [PATCH 35/72] timing code added --- cmd/stellar-rpc/internal/daemon/daemon.go | 10 +++++++++- cmd/stellar-rpc/internal/ingest/backfill.go | 9 +++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index a9d9bb5a..603bfddd 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -173,7 +173,14 @@ func newCaptiveCore(cfg *config.Config, logger *supportlog.Entry) (*ledgerbacken } func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { + startupStart := time.Now() logger = setupLogger(cfg, logger) + defer func() { + dur := time.Since(startupStart) + logger.WithFields(supportlog.F{ + "duration_ms": dur.Milliseconds(), + }).Info("backfill_done") + }() core := mustCreateCaptiveCore(cfg, logger) historyArchive := mustCreateHistoryArchive(cfg, logger) metricsRegistry := prometheus.NewRegistry() @@ -205,6 +212,7 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { var ingestCfg ingest.Config daemon.ingestService, ingestCfg = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) if cfg.Backfill { + backfillStart := time.Now() backfillMeta, err := ingest.NewBackfillMeta( logger, daemon.ingestService, @@ -215,13 +223,13 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { if err != nil { logger.WithError(err).Fatal("failed to create backfill metadata") } - if err := backfillMeta.RunBackfill(cfg); err != nil { logger.WithError(err).Fatal("failed to backfill ledgers") } // Clear the DB cache and fee windows so they re-populate from the database daemon.db.ResetCache() feewindows.Reset() + logger.Infof("Backfill completed in %s", time.Since(backfillStart)) } // Start ingestion service only after backfill is complete ingest.StartService(daemon.ingestService, ingestCfg) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 13f89627..629069fc 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -16,13 +16,10 @@ import ( ) const ( - // OneDayOfLedgers is (roughly) a 24 hour window of ledgers. - OneDayOfLedgers = config.OneDayOfLedgers - SevenDayOfLedgers = config.OneDayOfLedgers * 7 - // Number of ledgers to read/write at a time during backfill + // Number of ledgers to read/write per commit during backfill // 12 hours/8640 ledgers on an M4 MacBook Pro, backfill takes: - // on pubnet: 5.5 minutes; on testnet: ~2.2 seconds - ChunkSize uint32 = OneDayOfLedgers / 2 + // on pubnet: ~11 minutes; on testnet: <5 seconds + ChunkSize uint32 = config.OneDayOfLedgers // Acceptable number of ledgers that may be missing from the backfill tail/head ledgerThreshold uint32 = 384 // six checkpoints/~30 minutes of ledgers ) From 183038280b8257115645b14f926b5769ba07b0c5 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 9 Jan 2026 17:30:26 -0500 Subject: [PATCH 36/72] added config for timing --- cmd/stellar-rpc/internal/ingest/backfill.go | 11 +- rpc-config-backfill-pubnet-testing.toml | 253 ++++++++++++++++++++ 2 files changed, 261 insertions(+), 3 deletions(-) create mode 100644 rpc-config-backfill-pubnet-testing.toml diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 629069fc..5030e957 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -102,7 +102,7 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { backfill.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") ledgersInCheckpoint := cfg.CheckpointFrequency nBackfill := cfg.HistoryRetentionWindow - + startP1 := time.Now() // Phase 1: precheck to ensure no pre-existing gaps in local DB if !backfill.dbInfo.isEmpty { if _, _, err := backfill.verifyDbGapless(ctx); err != nil { @@ -136,7 +136,8 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { lBoundForwards = backfill.dbInfo.maxSeq + 1 } backfill.logger.Infof("Precheck and initialization passed! Starting backfill backwards phase (phase 2 of 4)") - + backfill.logger.Infof("Initialization/precheck completed in %s", time.Since(startP1)) + startP2 := time.Now() // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window if lBoundBackwards < rBoundBackwards { backfill.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", @@ -148,7 +149,8 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { } else { backfill.logger.Infof("No backwards backfill needed, local DB tail already covers retention window") } - + backfill.logger.Infof("Backwards backfill completed in %s", time.Since(startP2)) + startP3 := time.Now() // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB backfill.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") if rBoundForwards, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { @@ -165,7 +167,9 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { } // Log minimum written sequence after backwards backfill backfill.dbInfo.maxSeq = max(rBoundForwards, backfill.dbInfo.maxSeq) + backfill.logger.Infof("Forward backfill completed in %s", time.Since(startP3)) + startP4 := time.Now() // Phase 4: verify no gaps in local DB after backfill backfill.logger.Infof("Forward backfill complete, starting post-backfill verification") minSeq, maxSeq, err := backfill.verifyDbGapless(ctx) @@ -178,6 +182,7 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { "got %d ledgers (exceeds acceptable threshold of %d ledgers)", nBackfill, count, ledgerThreshold) } backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) + backfill.logger.Infof("Post-backfill verification completed in %s", time.Since(startP4)) return nil } diff --git a/rpc-config-backfill-pubnet-testing.toml b/rpc-config-backfill-pubnet-testing.toml new file mode 100644 index 00000000..41f2be3d --- /dev/null +++ b/rpc-config-backfill-pubnet-testing.toml @@ -0,0 +1,253 @@ + +# Admin endpoint to listen and serve on. WARNING: this should not be accessible +# from the Internet and does not use TLS. "" (default) disables the admin server +# ADMIN_ENDPOINT = "" + +# path to additional configuration for the Stellar Core configuration file used +# by captive core. It must, at least, include enough details to define a quorum +# set +CAPTIVE_CORE_CONFIG_PATH = "/Users/christian/Desktop/stellar-go-sdk/ingest/ledgerbackend/configs/captive-core-pubnet.cfg" + +# Storage location for Captive Core bucket data +CAPTIVE_CORE_STORAGE_PATH = "/Users/christian/Desktop/stellar-rpc/storage" + +# establishes how many ledgers exist between checkpoints, do NOT change this +# unless you really know what you are doing +CHECKPOINT_FREQUENCY = 64 + +# configures classic fee stats retention window expressed in number of ledgers +CLASSIC_FEE_STATS_RETENTION_WINDOW = 10 + +# SQLite DB path +DB_PATH = "soroban_rpc_backfill_pubnet_test.sqlite" + +# Default cap on the amount of events included in a single getEvents response +DEFAULT_EVENTS_LIMIT = 100 + +# Default cap on the amount of ledgers included in a single getLedgers response +DEFAULT_LEDGERS_LIMIT = 50 + +# Default cap on the amount of transactions included in a single getTransactions +# response +DEFAULT_TRANSACTIONS_LIMIT = 50 + +# Endpoint to listen and serve on +ENDPOINT = "localhost:8000" + +# The friendbot URL to be returned by getNetwork endpoint +# FRIENDBOT_URL = "" + +# comma-separated list of stellar history archives to connect with +HISTORY_ARCHIVE_URLS = ["http://history.stellar.org/prd/core-live/core_live_001/", "http://history.stellar.org/prd/core-live/core_live_002/", "http://history.stellar.org/prd/core-live/core_live_003/"] + +# configures history retention window for transactions and events, expressed in +# number of ledgers, the default value is 120960 which corresponds to about 7 +# days of history +HISTORY_RETENTION_WINDOW = 120960 + +# Ingestion Timeout when bootstrapping data (checkpoint and in-memory +# initialization) and preparing ledger reads +INGESTION_TIMEOUT = "50m0s" + +# format used for output logs (json or text) +# LOG_FORMAT = "text" + +# minimum log severity (debug, info, warn, error) to log +LOG_LEVEL = "info" + +# Maximum amount of events allowed in a single getEvents response +MAX_EVENTS_LIMIT = 10000 + +# The maximum duration of time allowed for processing a getEvents request. When +# that time elapses, the rpc server would return -32001 and abort the request's +# execution +MAX_GET_EVENTS_EXECUTION_DURATION = "10s" + +# The maximum duration of time allowed for processing a getFeeStats request. +# When that time elapses, the rpc server would return -32001 and abort the +# request's execution +MAX_GET_FEE_STATS_EXECUTION_DURATION = "5s" + +# The maximum duration of time allowed for processing a getHealth request. When +# that time elapses, the rpc server would return -32001 and abort the request's +# execution +MAX_GET_HEALTH_EXECUTION_DURATION = "5s" + +# The maximum duration of time allowed for processing a getLatestLedger request. +# When that time elapses, the rpc server would return -32001 and abort the +# request's execution +MAX_GET_LATEST_LEDGER_EXECUTION_DURATION = "5s" + +# The maximum duration of time allowed for processing a getLedgers request. When +# that time elapses, the rpc server would return -32001 and abort the request's +# execution +MAX_GET_LEDGERS_EXECUTION_DURATION = "10s" + +# The maximum duration of time allowed for processing a getLedgerEntries +# request. When that time elapses, the rpc server would return -32001 and abort +# the request's execution +MAX_GET_LEDGER_ENTRIES_EXECUTION_DURATION = "5s" + +# The maximum duration of time allowed for processing a getNetwork request. When +# that time elapses, the rpc server would return -32001 and abort the request's +# execution +MAX_GET_NETWORK_EXECUTION_DURATION = "5s" + +# The maximum duration of time allowed for processing a getTransactions request. +# When that time elapses, the rpc server would return -32001 and abort the +# request's execution +MAX_GET_TRANSACTIONS_EXECUTION_DURATION = "5s" + +# The maximum duration of time allowed for processing a getTransaction request. +# When that time elapses, the rpc server would return -32001 and abort the +# request's execution +MAX_GET_TRANSACTION_EXECUTION_DURATION = "5s" + +# The maximum duration of time allowed for processing a getVersionInfo request. +# When that time elapses, the rpc server would return -32001 and abort the +# request's execution +MAX_GET_VERSION_INFO_EXECUTION_DURATION = "5s" + +# maximum ledger latency (i.e. time elapsed since the last known ledger closing +# time) considered to be healthy (used for the /health endpoint) +MAX_HEALTHY_LEDGER_LATENCY = "30s" + +# Maximum amount of ledgers allowed in a single getLedgers response +MAX_LEDGERS_LIMIT = 200 + +# The max request execution duration is the predefined maximum duration of time +# allowed for processing a request. When that time elapses, the server would +# return 504 and abort the request's execution +MAX_REQUEST_EXECUTION_DURATION = "25s" + +# The maximum duration of time allowed for processing a sendTransaction request. +# When that time elapses, the rpc server would return -32001 and abort the +# request's execution +MAX_SEND_TRANSACTION_EXECUTION_DURATION = "15s" + +# The maximum duration of time allowed for processing a simulateTransaction +# request. When that time elapses, the rpc server would return -32001 and abort +# the request's execution +MAX_SIMULATE_TRANSACTION_EXECUTION_DURATION = "15s" + +# Maximum amount of transactions allowed in a single getTransactions response +MAX_TRANSACTIONS_LIMIT = 200 + +# Network passphrase of the Stellar network transactions should be signed for. +# Commonly used values are "Test SDF Future Network ; October 2022", "Test SDF +# Network ; September 2015" and "Public Global Stellar Network ; September 2015" +NETWORK_PASSPHRASE = "Public Global Stellar Network ; September 2015" + +# NETWORK = "pubnet" + +# Enable debug information in preflighting (provides more detailed errors). It +# should not be enabled in production deployments. +PREFLIGHT_ENABLE_DEBUG = true + +# Number of workers (read goroutines) used to compute preflights for the +# simulateTransaction endpoint. Defaults to the number of CPUs. +PREFLIGHT_WORKER_COUNT = 12 + +# Maximum number of outstanding preflight requests for the simulateTransaction +# endpoint. Defaults to the number of CPUs. +PREFLIGHT_WORKER_QUEUE_SIZE = 12 + +# Maximum number of outstanding GetEvents requests +REQUEST_BACKLOG_GET_EVENTS_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding GetFeeStats requests +REQUEST_BACKLOG_GET_FEE_STATS_QUEUE_LIMIT = 100 + +# Maximum number of outstanding GetHealth requests +REQUEST_BACKLOG_GET_HEALTH_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding GetLatestsLedger requests +REQUEST_BACKLOG_GET_LATEST_LEDGER_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding getLedgers requests +REQUEST_BACKLOG_GET_LEDGERS_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding GetLedgerEntries requests +REQUEST_BACKLOG_GET_LEDGER_ENTRIES_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding GetNetwork requests +REQUEST_BACKLOG_GET_NETWORK_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding GetTransactions requests +REQUEST_BACKLOG_GET_TRANSACTIONS_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding GetTransaction requests +REQUEST_BACKLOG_GET_TRANSACTION_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding GetVersionInfo requests +REQUEST_BACKLOG_GET_VERSION_INFO_QUEUE_LIMIT = 1000 + +# Maximum number of outstanding requests +REQUEST_BACKLOG_GLOBAL_QUEUE_LIMIT = 5000 + +# Maximum number of outstanding SendTransaction requests +REQUEST_BACKLOG_SEND_TRANSACTION_QUEUE_LIMIT = 500 + +# Maximum number of outstanding SimulateTransaction requests +REQUEST_BACKLOG_SIMULATE_TRANSACTION_QUEUE_LIMIT = 100 + +# The request execution warning threshold is the predetermined maximum duration +# of time that a request can take to be processed before a warning would be +# generated +REQUEST_EXECUTION_WARNING_THRESHOLD = "5s" + +# Fetch historical ledgers from the datastore if they're not available locally. +SERVE_LEDGERS_FROM_DATASTORE = true + +# configures soroban inclusion fee stats retention window expressed in number of +# ledgers +SOROBAN_FEE_STATS_RETENTION_WINDOW = 50 + +# HTTP port for Captive Core to listen on (0 disables the HTTP server) +STELLAR_CAPTIVE_CORE_HTTP_PORT = 11626 + +# HTTP port for Captive Core to listen on for high-performance queries like +# /getledgerentry (must not conflict with CAPTIVE_CORE_HTTP_PORT) +STELLAR_CAPTIVE_CORE_HTTP_QUERY_PORT = 11628 + +# Size of ledger history in Captive Core's high-performance query server (don't +# touch unless you know what you are doing) +STELLAR_CAPTIVE_CORE_HTTP_QUERY_SNAPSHOT_LEDGERS = 4 + +# Number of threads to use by Captive Core's high-performance query server +STELLAR_CAPTIVE_CORE_HTTP_QUERY_THREAD_POOL_SIZE = 12 + +# path to stellar core binary +STELLAR_CORE_BINARY_PATH = "/usr/local/bin/stellar-core" + +# Timeout used when submitting requests to stellar-core +STELLAR_CORE_TIMEOUT = "2s" + +# URL used to query Stellar Core (local captive core by default) +# STELLAR_CORE_URL = "" + +# Enable strict toml configuration file parsing. This will prevent unknown +# fields in the config toml from being parsed. +# STRICT = false + +# Buffered storage backend configuration for reading ledgers from the datastore. +# [buffered_storage_backend_config] + # buffer_size = 100 + # num_workers = 10 + # retry_limit = 0 + # retry_wait = "0s" + +# External datastore configuration including type, bucket name and schema. +[datastore_config] + Compression = "zstd" + NetworkPassphrase = "Public Global Stellar Network ; September 2015" + type = "GCS" + + [datastore_config.params] + destination_bucket_path = "sdf-ledger-close-meta/v1/ledgers/pubnet" + + [datastore_config.schema] + FileExtension = "zst" + files_per_partition = 64000 + ledgers_per_file = 1 + From c330809cbefe437ced7fb3bea09e9cb7583ba0e2 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 9 Jan 2026 17:36:45 -0500 Subject: [PATCH 37/72] minor timerlog addition --- cmd/stellar-rpc/internal/daemon/daemon.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index 603bfddd..c3ca7afa 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -213,6 +213,8 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { daemon.ingestService, ingestCfg = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) if cfg.Backfill { backfillStart := time.Now() + timerLog := logger.WithFields(supportlog.F{"backfill_ID": backfillStart}) + timerLog.Info("backfill_start") backfillMeta, err := ingest.NewBackfillMeta( logger, daemon.ingestService, @@ -230,6 +232,7 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { daemon.db.ResetCache() feewindows.Reset() logger.Infof("Backfill completed in %s", time.Since(backfillStart)) + timerLog.Info("backfill_done") } // Start ingestion service only after backfill is complete ingest.StartService(daemon.ingestService, ingestCfg) From 636223bdbda66dababaf027d1fd2fa824acbfb67 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 13 Jan 2026 18:26:03 -0500 Subject: [PATCH 38/72] repaired integration tests, changed backfill config params, added LCM to backfilled ledgers --- cmd/stellar-rpc/internal/ingest/backfill.go | 2 +- cmd/stellar-rpc/internal/ingest/service.go | 19 ++++++++++--------- .../internal/integrationtest/backfill_test.go | 4 ++-- .../infrastructure/docker/docker-compose.yml | 2 +- .../integrationtest/infrastructure/test.go | 3 +++ rpc-config-backfill-pubnet-testing.toml | 2 ++ 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 5030e957..f6e43a05 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -19,7 +19,7 @@ const ( // Number of ledgers to read/write per commit during backfill // 12 hours/8640 ledgers on an M4 MacBook Pro, backfill takes: // on pubnet: ~11 minutes; on testnet: <5 seconds - ChunkSize uint32 = config.OneDayOfLedgers + ChunkSize uint32 = config.OneDayOfLedgers / 18 // = 960 approx. 2Gb of RAM usage // Acceptable number of ledgers that may be missing from the backfill tail/head ledgerThreshold uint32 = 384 // six checkpoints/~30 minutes of ledgers ) diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index 68663c27..1cd17529 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -208,6 +208,15 @@ func (s *Service) ingest(ctx context.Context, sequence uint32) error { return err } + // Abstracted from ingestLedgerCloseMeta to allow fee window ingestion to be optional + startTime = time.Now() + if err := s.feeWindows.IngestFees(ledgerCloseMeta); err != nil { + return err + } + s.metrics.ingestionDurationMetric. + With(prometheus.Labels{"type": "fee-window"}). + Observe(time.Since(startTime).Seconds()) + durationMetrics := map[string]time.Duration{} if err := tx.Commit(ledgerCloseMeta, durationMetrics); err != nil { return err @@ -259,7 +268,7 @@ func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBacken if err != nil { return err } - if err := tx.LedgerWriter().InsertLedger(ledgerCloseMeta); err != nil { + if err := s.ingestLedgerCloseMeta(tx, ledgerCloseMeta); err != nil { return err } } @@ -313,13 +322,5 @@ func (s *Service) ingestLedgerCloseMeta(tx db.WriteTx, ledgerCloseMeta xdr.Ledge With(prometheus.Labels{"type": "events"}). Observe(time.Since(startTime).Seconds()) - startTime = time.Now() - if err := s.feeWindows.IngestFees(ledgerCloseMeta); err != nil { - return err - } - s.metrics.ingestionDurationMetric. - With(prometheus.Labels{"type": "fee-window"}). - Observe(time.Since(startTime).Seconds()) - return nil } diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 95b2101a..10ba7bc3 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -46,8 +46,8 @@ func TestBackfillLedgersAtStartOfDB(t *testing.T) { func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint32) { var ( - datastoreStart, datastoreEnd uint32 = 2, 192 - retentionWindow uint32 = 128 + datastoreStart, datastoreEnd uint32 = 2, 64 + retentionWindow uint32 = 24 ) gcsServer, makeDatastoreConfig := makeNewFakeGCSServer(t, datastoreStart, datastoreEnd, retentionWindow) diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/docker/docker-compose.yml b/cmd/stellar-rpc/internal/integrationtest/infrastructure/docker/docker-compose.yml index 2c6dd5b1..80221bfc 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/docker/docker-compose.yml +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/docker/docker-compose.yml @@ -13,7 +13,7 @@ services: # Note: Please keep the image pinned to an immutable tag matching the Captive Core version. # This avoids implicit updates which break compatibility between # the Core container and captive core. - image: ${CORE_IMAGE:-stellar/stellar-core:23.0.1-2670.050eacf11.focal} + image: ${CORE_IMAGE:-stellar/stellar-core:25.0.0-2911.e9748b05a.jammy} depends_on: - core-postgres environment: diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index 317394a6..7c914e25 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -214,6 +214,9 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { i.waitForCheckpoint() } if !i.runRPCInContainer() { + // FUTURE CHRISTIAN: LOOK HERE + // change your config flag to guard this instead, wait until ledger using waitforledger() (you have to make that) + // and then start the whole daemon after it gets to a ledger beyond end of DB + 20 i.spawnRPCDaemon() } diff --git a/rpc-config-backfill-pubnet-testing.toml b/rpc-config-backfill-pubnet-testing.toml index 41f2be3d..482f6028 100644 --- a/rpc-config-backfill-pubnet-testing.toml +++ b/rpc-config-backfill-pubnet-testing.toml @@ -11,6 +11,8 @@ CAPTIVE_CORE_CONFIG_PATH = "/Users/christian/Desktop/stellar-go-sdk/ingest/ledge # Storage location for Captive Core bucket data CAPTIVE_CORE_STORAGE_PATH = "/Users/christian/Desktop/stellar-rpc/storage" +BACKFILL = true + # establishes how many ledgers exist between checkpoints, do NOT change this # unless you really know what you are doing CHECKPOINT_FREQUENCY = 64 From 621037ff4a79d3d94bbe32feea75130e5b02ac5e Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 14 Jan 2026 16:02:42 -0500 Subject: [PATCH 39/72] added stale DB edge case error message, integration tests now test live ingestion post-backfill --- cmd/stellar-rpc/internal/ingest/backfill.go | 5 +++ .../internal/integrationtest/backfill_test.go | 35 ++++++++++++++----- .../integrationtest/infrastructure/test.go | 25 ++++++++++--- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index f6e43a05..f5795c90 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -132,6 +132,11 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { rBoundBackwards = currentTipLedger lBoundForwards = rBoundBackwards + 1 } else { + if currentTipLedger < backfill.dbInfo.minSeq { + // If we attempt to backfill from lBoundBackwards to currentTipLedger in this case, + // we introduce a gap missing ledgers of sequences (currentTipLedger, backfill.dbInfo.minSeq-1) + return errors.New("datastore stale: current tip is older than local DB minimum ledger") + } rBoundBackwards = backfill.dbInfo.minSeq - 1 lBoundForwards = backfill.dbInfo.maxSeq + 1 } diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 10ba7bc3..7adaafe8 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -46,7 +46,7 @@ func TestBackfillLedgersAtStartOfDB(t *testing.T) { func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint32) { var ( - datastoreStart, datastoreEnd uint32 = 2, 64 + datastoreStart, datastoreEnd uint32 = 2, 38 retentionWindow uint32 = 24 ) @@ -54,6 +54,7 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint defer gcsServer.Stop() t.Setenv("ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING", "true") + t.Setenv("STELLAR_RPC_INTEGRATION_TESTS_CAPTIVE_CORE_BIN", "/usr/local/bin/stellar-core") // Create temporary SQLite DB populated with dummy ledgers var dbPath string @@ -69,17 +70,17 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint noUpgrade := "" test := infrastructure.NewTest(t, &infrastructure.TestConfig{ - SQLitePath: dbPath, - DatastoreConfigFunc: makeDatastoreConfig, - NoParallel: true, // can't use parallel due to env vars - DontWaitForRPC: true, - ApplyLimits: &noUpgrade, + SQLitePath: dbPath, + DatastoreConfigFunc: makeDatastoreConfig, + NoParallel: true, // can't use parallel due to env vars + DelayDaemonForLedgerN: int(datastoreEnd) + 1, // stops daemon start until core has at least the datastore ledgers + ApplyLimits: &noUpgrade, }) client := test.GetRPCLient() // Helper to wait for conditions - waitUntil := func( + waitUntilBackfilled := func( cond func(l protocol.GetLatestLedgerResponse) bool, timeout time.Duration, ) protocol.GetLatestLedgerResponse { @@ -93,11 +94,27 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint return last } - finalBackfilledLedger := waitUntil(func(l protocol.GetLatestLedgerResponse) bool { - return l.Sequence >= datastoreEnd + finalBackfilledLedger := waitUntilBackfilled(func(l protocol.GetLatestLedgerResponse) bool { + return l.Sequence >= datastoreEnd+1 }, 60*time.Second) t.Logf("Successfully backfilled to ledger: %d", finalBackfilledLedger.Sequence) + waitUntilHealthy := func(cond func(h protocol.GetHealthResponse) bool, timeout time.Duration) protocol.GetHealthResponse { + var last protocol.GetHealthResponse + require.Eventually(t, func() bool { + resp, err := client.GetHealth(t.Context()) + require.NoError(t, err) + last = resp + return cond(resp) + }, timeout, 100*time.Millisecond, "last health: %+v", last) + return last + } + waitUntilHealthy(func(h protocol.GetHealthResponse) bool { + return h.Status == "healthy" && h.OldestLedger <= datastoreStart && h.LatestLedger >= datastoreEnd+1 + }, 30*time.Second) + t.Logf("DB now ingesting from core: health check shows healthy, oldest sequence %d, latest sequence %d", + datastoreStart, datastoreEnd+1) + result, err := client.GetLedgers(t.Context(), protocol.GetLedgersRequest{ StartLedger: datastoreStart, Pagination: &protocol.LedgerPaginationOptions{ diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index 7c914e25..a209e080 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -87,7 +87,7 @@ type TestConfig struct { // empty string to skip upgrading altogether. ApplyLimits *string - DontWaitForRPC bool // skip waiting for RPC to be healthy + DelayDaemonForLedgerN int // don't start daemon until ledger N reached by core DatastoreConfigFunc func(*config.Config) } @@ -160,9 +160,9 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { parallel = !cfg.NoParallel i.datastoreConfigFunc = cfg.DatastoreConfigFunc - if cfg.DontWaitForRPC { - shouldWaitForRPC = false - } + // if cfg.DontWaitForRPC { + // shouldWaitForRPC = false + // } if cfg.OnlyRPC != nil { i.onlyRPC = true i.testPorts.TestCorePorts = cfg.OnlyRPC.CorePorts @@ -217,6 +217,10 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { // FUTURE CHRISTIAN: LOOK HERE // change your config flag to guard this instead, wait until ledger using waitforledger() (you have to make that) // and then start the whole daemon after it gets to a ledger beyond end of DB + 20 + if cfg.DelayDaemonForLedgerN != 0 { + i.t.Logf("Delaying daemon start until core reaches ledger %d", cfg.DelayDaemonForLedgerN) + i.waitForLedger(cfg.DelayDaemonForLedgerN) + } i.spawnRPCDaemon() } @@ -316,6 +320,19 @@ func (i *Test) waitForCheckpoint() { ) } +func (i *Test) waitForLedger(ledger int) { + i.t.Logf("Waiting for ledger %d...", ledger) + require.Eventually(i.t, + func() bool { + info, err := i.getCoreInfo() + i.t.Logf("debug: reached ledger %d...", info.Info.Ledger.Num) + return err == nil && info.Info.Ledger.Num >= ledger + }, + 90*time.Second, + time.Second, + ) +} + func (i *Test) getRPConfigForContainer() rpcConfig { return rpcConfig{ // The container needs to listen on all interfaces, not just localhost From 7f69666dfe923ce638b1a4639d7341292be4ab7c Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 16 Jan 2026 11:47:21 -0500 Subject: [PATCH 40/72] integration test debugging --- cmd/stellar-rpc/internal/config/main.go | 1 + cmd/stellar-rpc/internal/config/options.go | 5 + cmd/stellar-rpc/internal/daemon/daemon.go | 8 + cmd/stellar-rpc/internal/ingest/backfill.go | 26 ++- cmd/stellar-rpc/internal/ingest/service.go | 1 - .../internal/integrationtest/backfill_test.go | 171 +++++++++++------- .../integrationtest/get_ledgers_test.go | 8 +- .../integrationtest/infrastructure/test.go | 41 +++-- 8 files changed, 165 insertions(+), 96 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/main.go b/cmd/stellar-rpc/internal/config/main.go index c74d92ad..776c3e72 100644 --- a/cmd/stellar-rpc/internal/config/main.go +++ b/cmd/stellar-rpc/internal/config/main.go @@ -29,6 +29,7 @@ type Config struct { Endpoint string AdminEndpoint string + BackfillTimeout time.Duration CheckpointFrequency uint32 CoreRequestTimeout time.Duration DefaultEventsLimit uint diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index d07bb3ac..55e5472a 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -95,6 +95,11 @@ func (cfg *Config) options() Options { return nil }, }, + { + Name: "backfill-timeout", + Usage: "Timeout for backfill database", + ConfigKey: &cfg.BackfillTimeout, + }, { Name: "stellar-core-timeout", Usage: "Timeout used when submitting requests to stellar-core", diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index c3ca7afa..e3bd3d1b 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -85,6 +85,14 @@ func (d *Daemon) GetEndpointAddrs() (net.TCPAddr, *net.TCPAddr) { return *addr, adminAddr } +func (d *Daemon) GetIngestService() *ingest.Service { + return d.ingestService +} + +func (d *Daemon) StopIngestion() error { + return d.ingestService.Close() +} + func (d *Daemon) close() { shutdownCtx, shutdownRelease := context.WithTimeout(context.Background(), defaultShutdownGracePeriod) defer shutdownRelease() diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index f5795c90..15881cb0 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -17,9 +17,7 @@ import ( const ( // Number of ledgers to read/write per commit during backfill - // 12 hours/8640 ledgers on an M4 MacBook Pro, backfill takes: - // on pubnet: ~11 minutes; on testnet: <5 seconds - ChunkSize uint32 = config.OneDayOfLedgers / 18 // = 960 approx. 2Gb of RAM usage + ChunkSize uint32 = config.OneDayOfLedgers / 18 // = 960 ledgers, approx. 2Gb of RAM usage // Acceptable number of ledgers that may be missing from the backfill tail/head ledgerThreshold uint32 = 384 // six checkpoints/~30 minutes of ledgers ) @@ -43,7 +41,6 @@ type datastoreInfo struct { // This struct holds the local database read/write constructs and metadata initially associated with it type databaseInfo struct { - rw db.ReadWriter reader db.LedgerReader minSeq uint32 maxSeq uint32 @@ -58,7 +55,7 @@ func NewBackfillMeta( ds datastore.DataStore, dsSchema datastore.DataStoreSchema, ) (BackfillMeta, error) { - ctx, cancelInit := context.WithTimeout(context.Background(), 5*time.Second) + ctx, cancelInit := context.WithTimeout(context.Background(), time.Minute) defer cancelInit() // Query local DB to determine min and max sequence numbers among the written ledgers @@ -96,12 +93,15 @@ func NewBackfillMeta( // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { - ctx, cancelBackfill := context.WithTimeout(context.Background(), 4*time.Hour) // TODO: determine backfill timeout + nBackfill := cfg.HistoryRetentionWindow + if cfg.BackfillTimeout == 0 { + cfg.BackfillTimeout = time.Duration(nBackfill/config.OneDayOfLedgers) * time.Hour + } + ctx, cancelBackfill := context.WithTimeout(context.Background(), cfg.BackfillTimeout) defer cancelBackfill() backfill.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") ledgersInCheckpoint := cfg.CheckpointFrequency - nBackfill := cfg.HistoryRetentionWindow startP1 := time.Now() // Phase 1: precheck to ensure no pre-existing gaps in local DB if !backfill.dbInfo.isEmpty { @@ -262,6 +262,7 @@ func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u func (backfill *BackfillMeta) runBackfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { // Backend for forwards backfill can be persistent over multiple chunks backend, err := makeBackend(backfill.dsInfo) + // for testing: prepareRange on entire range, then normal write/commit if err != nil { return errors.Wrap(err, "could not create ledger backend") } @@ -302,8 +303,8 @@ func (backfill *BackfillMeta) fillChunk( func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { backend, err := ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ - BufferSize: 1024, - NumWorkers: 1000, + BufferSize: 1024, // buffer is in number of FILES + NumWorkers: 100, RetryLimit: 3, RetryWait: 5 * time.Second, }, @@ -313,6 +314,13 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { return backend, err } +// Karthik GCS Configuration +// GCSBucketPath = "sdf-ledger-close-meta/v1/ledgers/pubnet" +// GCSBufferSize = 10000 +// GCSNumWorkers = 200 +// GCSRetryLimit = 3 +// GCSRetryWait = 5 * time.Second + // Gets the latest ledger number stored in the cloud Datastore/datalake // Stores it in tip pointer func getLatestSeqInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index 1cd17529..d58efa38 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -41,7 +41,6 @@ type Config struct { func NewService(cfg Config) *Service { service := newService(cfg) - // StartService(service, cfg) return service } diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 7adaafe8..d6e8f0db 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -1,6 +1,7 @@ package integrationtest import ( + "fmt" "path" "testing" "time" @@ -21,6 +22,9 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/integrationtest/infrastructure" ) +// Captive core will decide its own close times, so track and set accurate close times for artificial ledgers +var seqToCloseTime = map[uint32]xdr.TimePoint{} //nolint:gochecknoglobals + func TestBackfillEmptyDB(t *testing.T) { // GCS has ledgers from 2-192; history retention window is 128 var localDbStart, localDbEnd uint32 = 0, 0 @@ -31,113 +35,129 @@ func TestBackfillEmptyDB(t *testing.T) { // This induces a backfill backwards from localStart-1 to (datastoreEnd - retentionWindow), // then forwards from localEnd+1 to datastoreEnd func TestBackfillLedgersInMiddleOfDB(t *testing.T) { - // GCS has ledgers from 2-192; history retention window is 128 - var localDbStart, localDbEnd uint32 = 50, 100 + // GCS has ledgers from 2-38; history retention window is 24 + var localDbStart, localDbEnd uint32 = 24, 30 testBackfillWithSeededDbLedgers(t, localDbStart, localDbEnd) } // Backfill with some ledgers at start of DB (simulates pulling plug when backfilling forwards) // This is a "only backfill forwards" scenario func TestBackfillLedgersAtStartOfDB(t *testing.T) { - // GCS has ledgers from 2-192; history retention window is 128 - var localDbStart, localDbEnd uint32 = 2, 100 + // GCS has ledgers from 2-38; history retention window is 24 + var localDbStart, localDbEnd uint32 = 2, 28 testBackfillWithSeededDbLedgers(t, localDbStart, localDbEnd) } func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint32) { var ( datastoreStart, datastoreEnd uint32 = 2, 38 - retentionWindow uint32 = 24 + retentionWindow uint32 = 64 // wait for ledger 66, verify [2,64] ingested + checkpointFrequency int = 64 + stopLedger int = checkpointFrequency + 2 ) - gcsServer, makeDatastoreConfig := makeNewFakeGCSServer(t, datastoreStart, datastoreEnd, retentionWindow) - defer gcsServer.Stop() - - t.Setenv("ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING", "true") + // t.Setenv("ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING", "true") t.Setenv("STELLAR_RPC_INTEGRATION_TESTS_CAPTIVE_CORE_BIN", "/usr/local/bin/stellar-core") + t.Setenv("BACKFILL_TIMEOUT", "2m") + + gcsServer, makeDatastoreConfig := makeNewFakeGCSServer(t, datastoreStart, datastoreEnd, retentionWindow, int64(stopLedger/2)) + defer gcsServer.Stop() // Create temporary SQLite DB populated with dummy ledgers var dbPath string + tmp := t.TempDir() + dbPath = path.Join(tmp, "test.sqlite") + testDB := createDbWithLedgers(t, dbPath, localDbStart, localDbEnd, retentionWindow) + testDB.Close() if localDbEnd != 0 { - tmp := t.TempDir() - dbPath = path.Join(tmp, "test.sqlite") - testDB := createDbWithLedgers(t, dbPath, localDbStart, localDbEnd) - defer testDB.Close() - t.Logf("Seeded local DB with ledgers %d-%d", localDbStart, localDbEnd) + t.Logf("Created local DB, seeded with ledgers %d-%d", localDbStart, localDbEnd) } else { - t.Logf("No local DB created or seeded, testing with no initial DB") + t.Logf("Created empty local DB") } - noUpgrade := "" + // noUpgrade := "" test := infrastructure.NewTest(t, &infrastructure.TestConfig{ SQLitePath: dbPath, DatastoreConfigFunc: makeDatastoreConfig, - NoParallel: true, // can't use parallel due to env vars - DelayDaemonForLedgerN: int(datastoreEnd) + 1, // stops daemon start until core has at least the datastore ledgers - ApplyLimits: &noUpgrade, + NoParallel: true, // can't use parallel due to env vars + DelayDaemonForLedgerN: stopLedger, // stops daemon start until core has at least the datastore ledgers + // ApplyLimits: &noUpgrade, // Check that it ingests all ledgers instead of health + // DontWaitForRPC: true, }) client := test.GetRPCLient() - // Helper to wait for conditions - waitUntilBackfilled := func( + // Helper to wait for ledger + waitUntilLedger := func( cond func(l protocol.GetLatestLedgerResponse) bool, timeout time.Duration, + cancelIngest bool, ) protocol.GetLatestLedgerResponse { var last protocol.GetLatestLedgerResponse require.Eventually(t, func() bool { resp, err := client.GetLatestLedger(t.Context()) require.NoError(t, err) last = resp + if cancelIngest && cond(resp) { + test.StopCore() + } return cond(resp) }, timeout, 100*time.Millisecond, "last ledger backfilled: %+v", last.Sequence) + if last.Sequence > 0 { + if _, ok := seqToCloseTime[last.Sequence]; !ok { + seqToCloseTime[last.Sequence] = xdr.TimePoint(last.LedgerCloseTime) + } + } return last } - finalBackfilledLedger := waitUntilBackfilled(func(l protocol.GetLatestLedgerResponse) bool { - return l.Sequence >= datastoreEnd+1 - }, 60*time.Second) - t.Logf("Successfully backfilled to ledger: %d", finalBackfilledLedger.Sequence) + backfillComplete := waitUntilLedger(func(l protocol.GetLatestLedgerResponse) bool { + return l.Sequence >= datastoreEnd + }, 60*time.Second, false) + t.Logf("Successfully backfilled, ledger %d fetched from DB", backfillComplete.Sequence) - waitUntilHealthy := func(cond func(h protocol.GetHealthResponse) bool, timeout time.Duration) protocol.GetHealthResponse { - var last protocol.GetHealthResponse - require.Eventually(t, func() bool { - resp, err := client.GetHealth(t.Context()) - require.NoError(t, err) - last = resp - return cond(resp) - }, timeout, 100*time.Millisecond, "last health: %+v", last) - return last - } - waitUntilHealthy(func(h protocol.GetHealthResponse) bool { - return h.Status == "healthy" && h.OldestLedger <= datastoreStart && h.LatestLedger >= datastoreEnd+1 - }, 30*time.Second) - t.Logf("DB now ingesting from core: health check shows healthy, oldest sequence %d, latest sequence %d", - datastoreStart, datastoreEnd+1) - - result, err := client.GetLedgers(t.Context(), protocol.GetLedgersRequest{ - StartLedger: datastoreStart, - Pagination: &protocol.LedgerPaginationOptions{ - Limit: uint(datastoreEnd - datastoreStart + 1), - }, - }) + coreIngestionComplete := waitUntilLedger(func(l protocol.GetLatestLedgerResponse) bool { + return l.Sequence >= uint32(stopLedger) + }, 60*time.Second, true) + t.Logf("Core ingestion complete, ledger %d fetched from captive core", coreIngestionComplete.Sequence) + // Stop ingestion to prevent further ledgers from being ingested + // test.GetDaemon().StopIngestion() + reader := db.NewLedgerReader(testDB) + ledgers, err := reader.GetLedgerSequencesInRange(t.Context(), datastoreStart, uint32(stopLedger)) require.NoError(t, err) - require.Len(t, result.Ledgers, int(datastoreEnd-datastoreStart+1), - "expected to get backfilled ledgers from local DB") - - // Verify they're contiguous - for i, ledger := range result.Ledgers { - expectedSeq := datastoreStart + uint32(i) - require.Equal(t, expectedSeq, ledger.Sequence, - "gap detected at position %d: expected %d, got %d", i, expectedSeq, ledger.Sequence) - } + len := uint32(len(ledgers)) + require.LessOrEqual(t, ledgers[0], datastoreEnd, "did not ingest ledgers from datastore: "+ + fmt.Sprintf("expected first ledger <= %d, got %d", datastoreEnd, ledgers[len-1])) + require.Greater(t, ledgers[len-1], datastoreEnd, "did not ingest ledgers from core after backfill: "+ + fmt.Sprintf("expected last ledger > %d, got %d", datastoreEnd, ledgers[len-1])) + t.Logf("Verified ledgers %d-%d present in local DB", ledgers[0], ledgers[len-1]) + // result, err := client.GetLedgers(t.Context(), protocol.GetLedgersRequest{ + // StartLedger: 2, + // Pagination: &protocol.LedgerPaginationOptions{ + // Limit: uint(retentionWindow), + // }, + // }) + + // We cannot use GetLedgers as it will fall back to the datastore, which is cheating + + // require.NoError(t, err) + // require.Len(t, result.Ledgers, int(retentionWindow), + // "expected to get backfilled ledgers from local DB") + + // // Verify they're contiguous + // for i, ledger := range result.Ledgers { + // expectedSeq := datastoreStart + uint32(i) + // require.Equal(t, expectedSeq, ledger.Sequence, + // "gap detected at position %d: expected %d, got %d", i, expectedSeq, ledger.Sequence) + // } } func makeNewFakeGCSServer(t *testing.T, datastoreStart, datastoreEnd, retentionWindow uint32, + timeOffset int64, ) (*fakestorage.Server, func(*config.Config)) { opts := fakestorage.Options{ Scheme: "http", @@ -182,20 +202,20 @@ func makeNewFakeGCSServer(t *testing.T, BucketName: bucketName, Name: objPrefix + "/" + schema.GetObjectKeyFromSequenceNumber(seq), }, - Content: createLCMBatchBuffer(seq), + Content: createLCMBatchBuffer(seq, xdr.TimePoint(time.Now().Unix()+int64(timeOffset))), }) } return gcsServer, makeDatastoreConfig } -func createDbWithLedgers(t *testing.T, dbPath string, start, end uint32) *db.DB { +func createDbWithLedgers(t *testing.T, dbPath string, start, end, retentionWindow uint32) *db.DB { testDB, err := db.OpenSQLiteDB(dbPath) require.NoError(t, err) - defer testDB.Close() + // defer testDB.Close() testLogger := supportlog.New() - rw := db.NewReadWriter(testLogger, testDB, interfaces.MakeNoOpDeamon(), 10, 10, + rw := db.NewReadWriter(testLogger, testDB, interfaces.MakeNoOpDeamon(), int(retentionWindow), retentionWindow, network.TestNetworkPassphrase) // Insert dummy ledgers into the DB @@ -203,24 +223,25 @@ func createDbWithLedgers(t *testing.T, dbPath string, start, end uint32) *db.DB require.NoError(t, err) var lastLedger xdr.LedgerCloseMeta - for seq := start; seq <= end; seq++ { - ledger := createLedger(seq) - require.NoError(t, writeTx.LedgerWriter().InsertLedger(ledger)) - lastLedger = ledger + if end != 0 { + for seq := start; seq <= end; seq++ { + ledger := createLedger(seq) + require.NoError(t, writeTx.LedgerWriter().InsertLedger(ledger)) + lastLedger = ledger + } + require.NoError(t, writeTx.Commit(lastLedger, nil)) } - require.NoError(t, writeTx.Commit(lastLedger, nil)) return testDB } func createLedger(ledgerSequence uint32) xdr.LedgerCloseMeta { + now := time.Now().Unix() return xdr.LedgerCloseMeta{ V: 1, V1: &xdr.LedgerCloseMetaV1{ LedgerHeader: xdr.LedgerHeaderHistoryEntry{ - Hash: xdr.Hash{}, - Header: xdr.LedgerHeader{ - LedgerSeq: xdr.Uint32(ledgerSequence), - }, + Hash: xdr.Hash{}, + Header: makeLedgerHeader(ledgerSequence, 25, xdr.TimePoint(now)), }, TxSet: xdr.GeneralizedTransactionSet{ V: 1, @@ -229,3 +250,15 @@ func createLedger(ledgerSequence uint32) xdr.LedgerCloseMeta { }, } } + +func makeLedgerHeader(ledgerSequence uint32, protocolVersion uint32, closeTime xdr.TimePoint) xdr.LedgerHeader { + return xdr.LedgerHeader{ + LedgerSeq: xdr.Uint32(ledgerSequence), + LedgerVersion: xdr.Uint32(protocolVersion), + ScpValue: xdr.StellarValue{ + CloseTime: closeTime, + TxSetHash: xdr.Hash{}, + Upgrades: nil, + }, + } +} diff --git a/cmd/stellar-rpc/internal/integrationtest/get_ledgers_test.go b/cmd/stellar-rpc/internal/integrationtest/get_ledgers_test.go index 9258fab2..decb5eda 100644 --- a/cmd/stellar-rpc/internal/integrationtest/get_ledgers_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/get_ledgers_test.go @@ -146,7 +146,7 @@ func TestGetLedgersFromDatastore(t *testing.T) { BucketName: bucketName, Name: schema.GetObjectKeyFromSequenceNumber(seq), }, - Content: createLCMBatchBuffer(seq), + Content: createLCMBatchBuffer(seq, xdr.TimePoint(0)), }) } @@ -250,7 +250,7 @@ func TestGetLedgersFromDatastore(t *testing.T) { }) } -func createLCMBatchBuffer(seq uint32) []byte { +func createLCMBatchBuffer(seq uint32, closeTime xdr.TimePoint) []byte { lcm := xdr.LedgerCloseMetaBatch{ StartSequence: xdr.Uint32(seq), EndSequence: xdr.Uint32(seq), @@ -259,9 +259,7 @@ func createLCMBatchBuffer(seq uint32) []byte { V: int32(0), V0: &xdr.LedgerCloseMetaV0{ LedgerHeader: xdr.LedgerHeaderHistoryEntry{ - Header: xdr.LedgerHeader{ - LedgerSeq: xdr.Uint32(seq), - }, + Header: makeLedgerHeader(seq, 25, closeTime), }, }, }, diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index a209e080..33b9906c 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -87,7 +87,8 @@ type TestConfig struct { // empty string to skip upgrading altogether. ApplyLimits *string - DelayDaemonForLedgerN int // don't start daemon until ledger N reached by core + DontWaitForRPC bool // don't wait for RPC to be healthy + DelayDaemonForLedgerN int // don't start daemon until ledger N reached by core DatastoreConfigFunc func(*config.Config) } @@ -160,9 +161,9 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { parallel = !cfg.NoParallel i.datastoreConfigFunc = cfg.DatastoreConfigFunc - // if cfg.DontWaitForRPC { - // shouldWaitForRPC = false - // } + if cfg.DontWaitForRPC { + shouldWaitForRPC = false + } if cfg.OnlyRPC != nil { i.onlyRPC = true i.testPorts.TestCorePorts = cfg.OnlyRPC.CorePorts @@ -214,12 +215,21 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { i.waitForCheckpoint() } if !i.runRPCInContainer() { - // FUTURE CHRISTIAN: LOOK HERE - // change your config flag to guard this instead, wait until ledger using waitforledger() (you have to make that) - // and then start the whole daemon after it gets to a ledger beyond end of DB + 20 if cfg.DelayDaemonForLedgerN != 0 { i.t.Logf("Delaying daemon start until core reaches ledger %d", cfg.DelayDaemonForLedgerN) i.waitForLedger(cfg.DelayDaemonForLedgerN) + + // cancelIngest := func() { + // for { + // info, err := i.getCoreInfo() + // if err != nil && i.daemon != nil && info.Info.Ledger.Num >= cfg.DelayDaemonForLedgerN && i.daemon.GetIngestService() != nil { + // i.daemon.StopIngestion() + // break + // } + // time.Sleep(50 * time.Millisecond) + // } + // } + // go cancelIngest() } i.spawnRPCDaemon() } @@ -264,6 +274,13 @@ func (i *Test) spawnContainers() { i.fillContainerPorts() } +func (i *Test) StopCore() { + if !i.onlyRPC && i.areThereContainers() { + i.runSuccessfulComposeCommand("stop", "core") + i.t.Log("Stopped Core container") + } +} + func (i *Test) stopContainers() { // There were containerized workloads we should bring down downCmd := []string{"down"} @@ -325,7 +342,6 @@ func (i *Test) waitForLedger(ledger int) { require.Eventually(i.t, func() bool { info, err := i.getCoreInfo() - i.t.Logf("debug: reached ledger %d...", info.Info.Ledger.Num) return err == nil && info.Info.Ledger.Num >= ledger }, 90*time.Second, @@ -413,16 +429,17 @@ func (vars rpcConfig) toMap() map[string]string { func (i *Test) waitForRPC() { i.t.Log("Waiting for RPC to be healthy...") - + var err error require.Eventually(i.t, func() bool { - result, err := i.GetRPCLient().GetHealth(context.Background()) - i.t.Logf("getHealth: %+v", result) + var result protocol.GetHealthResponse + result, err = i.GetRPCLient().GetHealth(context.Background()) + i.t.Logf("getHealth: %+v; err: %v", result, err) return err == nil && result.Status == "healthy" }, 30*time.Second, time.Second, - "RPC never got healthy", + fmt.Sprintf("RPC never got healthy: %+v", err), ) } From 01e316c5b19b01e855e04fe59f7535aaaa36cfa9 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 16 Jan 2026 13:48:08 -0500 Subject: [PATCH 41/72] working robust integration tests, not flaky --- .../internal/integrationtest/backfill_test.go | 152 ++++++++---------- .../integrationtest/infrastructure/test.go | 49 +++--- 2 files changed, 90 insertions(+), 111 deletions(-) diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index d6e8f0db..51b1803e 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -9,6 +9,7 @@ import ( "github.com/fsouza/fake-gcs-server/fakestorage" "github.com/stretchr/testify/require" + client "github.com/stellar/go-stellar-sdk/clients/rpcclient" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/network" protocol "github.com/stellar/go-stellar-sdk/protocols/rpc" @@ -22,9 +23,6 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/integrationtest/infrastructure" ) -// Captive core will decide its own close times, so track and set accurate close times for artificial ledgers -var seqToCloseTime = map[uint32]xdr.TimePoint{} //nolint:gochecknoglobals - func TestBackfillEmptyDB(t *testing.T) { // GCS has ledgers from 2-192; history retention window is 128 var localDbStart, localDbEnd uint32 = 0, 0 @@ -50,114 +48,87 @@ func TestBackfillLedgersAtStartOfDB(t *testing.T) { func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint32) { var ( - datastoreStart, datastoreEnd uint32 = 2, 38 - retentionWindow uint32 = 64 // wait for ledger 66, verify [2,64] ingested + datastoreStart, datastoreEnd uint32 = 2, 38 // ledgers present in datastore + retentionWindow uint32 = 64 checkpointFrequency int = 64 - stopLedger int = checkpointFrequency + 2 + stopLedger int = checkpointFrequency + 2 // final ledger to ingest ) - // t.Setenv("ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING", "true") t.Setenv("STELLAR_RPC_INTEGRATION_TESTS_CAPTIVE_CORE_BIN", "/usr/local/bin/stellar-core") - t.Setenv("BACKFILL_TIMEOUT", "2m") - gcsServer, makeDatastoreConfig := makeNewFakeGCSServer(t, datastoreStart, datastoreEnd, retentionWindow, int64(stopLedger/2)) + gcsServer, makeDatastoreConfig := makeNewFakeGCSServer(t, datastoreStart, datastoreEnd, retentionWindow) defer gcsServer.Stop() // Create temporary SQLite DB populated with dummy ledgers - var dbPath string - tmp := t.TempDir() - dbPath = path.Join(tmp, "test.sqlite") - testDB := createDbWithLedgers(t, dbPath, localDbStart, localDbEnd, retentionWindow) - testDB.Close() - if localDbEnd != 0 { - t.Logf("Created local DB, seeded with ledgers %d-%d", localDbStart, localDbEnd) - } else { - t.Logf("Created empty local DB") - } + dbPath := createDbWithLedgers(t, localDbStart, localDbEnd, retentionWindow) - // noUpgrade := "" test := infrastructure.NewTest(t, &infrastructure.TestConfig{ - SQLitePath: dbPath, - DatastoreConfigFunc: makeDatastoreConfig, - NoParallel: true, // can't use parallel due to env vars - DelayDaemonForLedgerN: stopLedger, // stops daemon start until core has at least the datastore ledgers - // ApplyLimits: &noUpgrade, // Check that it ingests all ledgers instead of health - // DontWaitForRPC: true, + SQLitePath: dbPath, + DatastoreConfigFunc: makeDatastoreConfig, + NoParallel: true, // can't use parallel due to env vars + DelayDaemonForLedgerN: int(datastoreEnd), // stops daemon start until core has at least the datastore ledgers + BackfillTimeout: 4 * time.Minute, + IgnoreLedgerCloseTimes: true, // artificially seeded ledgers don't need correct close times relative to core's }) + testDb := test.GetDaemon().GetDB() client := test.GetRPCLient() - // Helper to wait for ledger - waitUntilLedger := func( - cond func(l protocol.GetLatestLedgerResponse) bool, - timeout time.Duration, - cancelIngest bool, - ) protocol.GetLatestLedgerResponse { - var last protocol.GetLatestLedgerResponse - require.Eventually(t, func() bool { - resp, err := client.GetLatestLedger(t.Context()) - require.NoError(t, err) - last = resp - if cancelIngest && cond(resp) { - test.StopCore() - } - return cond(resp) - }, timeout, 100*time.Millisecond, "last ledger backfilled: %+v", last.Sequence) - if last.Sequence > 0 { - if _, ok := seqToCloseTime[last.Sequence]; !ok { - seqToCloseTime[last.Sequence] = xdr.TimePoint(last.LedgerCloseTime) - } - } - return last - } - - backfillComplete := waitUntilLedger(func(l protocol.GetLatestLedgerResponse) bool { - return l.Sequence >= datastoreEnd - }, 60*time.Second, false) + backfillComplete := waitUntilLedgerIngested(t, test, client, + func(l protocol.GetLatestLedgerResponse) bool { + return l.Sequence >= datastoreEnd + }, 60*time.Second, false) t.Logf("Successfully backfilled, ledger %d fetched from DB", backfillComplete.Sequence) - coreIngestionComplete := waitUntilLedger(func(l protocol.GetLatestLedgerResponse) bool { - return l.Sequence >= uint32(stopLedger) - }, 60*time.Second, true) + coreIngestionComplete := waitUntilLedgerIngested(t, test, client, + func(l protocol.GetLatestLedgerResponse) bool { + return l.Sequence >= uint32(stopLedger) + }, time.Duration(stopLedger)*time.Second, true) // stop core ingestion once we reach the target t.Logf("Core ingestion complete, ledger %d fetched from captive core", coreIngestionComplete.Sequence) - // Stop ingestion to prevent further ledgers from being ingested - // test.GetDaemon().StopIngestion() - reader := db.NewLedgerReader(testDB) + // We cannot use GetLedgers as it will fall back to the datastore, which is cheating + reader := db.NewLedgerReader(testDb) ledgers, err := reader.GetLedgerSequencesInRange(t.Context(), datastoreStart, uint32(stopLedger)) require.NoError(t, err) len := uint32(len(ledgers)) + require.Equal(t, retentionWindow, len, "expected to have ingested %d ledgers, got %d", retentionWindow, len) require.LessOrEqual(t, ledgers[0], datastoreEnd, "did not ingest ledgers from datastore: "+ fmt.Sprintf("expected first ledger <= %d, got %d", datastoreEnd, ledgers[len-1])) require.Greater(t, ledgers[len-1], datastoreEnd, "did not ingest ledgers from core after backfill: "+ fmt.Sprintf("expected last ledger > %d, got %d", datastoreEnd, ledgers[len-1])) + // Verify they're contiguous + prevSequence := ledgers[0] + for i, sequence := range ledgers[1:] { + require.Equal(t, prevSequence+1, sequence, + "gap detected at position %d: expected %d, got %d", i, prevSequence+1, sequence) + prevSequence = sequence + } t.Logf("Verified ledgers %d-%d present in local DB", ledgers[0], ledgers[len-1]) - // result, err := client.GetLedgers(t.Context(), protocol.GetLedgersRequest{ - // StartLedger: 2, - // Pagination: &protocol.LedgerPaginationOptions{ - // Limit: uint(retentionWindow), - // }, - // }) - - // We cannot use GetLedgers as it will fall back to the datastore, which is cheating - - // require.NoError(t, err) - // require.Len(t, result.Ledgers, int(retentionWindow), - // "expected to get backfilled ledgers from local DB") +} - // // Verify they're contiguous - // for i, ledger := range result.Ledgers { - // expectedSeq := datastoreStart + uint32(i) - // require.Equal(t, expectedSeq, ledger.Sequence, - // "gap detected at position %d: expected %d, got %d", i, expectedSeq, ledger.Sequence) - // } +func waitUntilLedgerIngested(t *testing.T, test *infrastructure.Test, rpcClient *client.Client, + cond func(l protocol.GetLatestLedgerResponse) bool, + timeout time.Duration, + cancelIngest bool, +) protocol.GetLatestLedgerResponse { + var last protocol.GetLatestLedgerResponse + require.Eventually(t, func() bool { + resp, err := rpcClient.GetLatestLedger(t.Context()) + require.NoError(t, err) + last = resp + if cancelIngest && cond(resp) { + // This prevents an unlikely race caused by further ingestion by core. Ask me how I know! + test.StopCore() + } + return cond(resp) + }, timeout, 100*time.Millisecond, "last ledger backfilled: %+v", last.Sequence) + return last } func makeNewFakeGCSServer(t *testing.T, datastoreStart, datastoreEnd, retentionWindow uint32, - timeOffset int64, ) (*fakestorage.Server, func(*config.Config)) { opts := fakestorage.Options{ Scheme: "http", @@ -176,7 +147,6 @@ func makeNewFakeGCSServer(t *testing.T, FilesPerPartition: 64000, LedgersPerFile: 1, } - // Configure with backfill enabled and retention window of 128 ledgers makeDatastoreConfig := func(cfg *config.Config) { cfg.ServeLedgersFromDatastore = true @@ -194,7 +164,6 @@ func makeNewFakeGCSServer(t *testing.T, cfg.ClassicFeeStatsLedgerRetentionWindow = retentionWindow cfg.SorobanFeeStatsLedgerRetentionWindow = retentionWindow } - // Add ledger files to datastore for seq := datastoreStart; seq <= datastoreEnd; seq++ { gcsServer.CreateObject(fakestorage.Object{ @@ -202,21 +171,25 @@ func makeNewFakeGCSServer(t *testing.T, BucketName: bucketName, Name: objPrefix + "/" + schema.GetObjectKeyFromSequenceNumber(seq), }, - Content: createLCMBatchBuffer(seq, xdr.TimePoint(time.Now().Unix()+int64(timeOffset))), + Content: createLCMBatchBuffer(seq, xdr.TimePoint(time.Now().Unix())), }) } return gcsServer, makeDatastoreConfig } -func createDbWithLedgers(t *testing.T, dbPath string, start, end, retentionWindow uint32) *db.DB { +func createDbWithLedgers(t *testing.T, start, end, retentionWindow uint32) string { + tmp := t.TempDir() + dbPath := path.Join(tmp, "test.sqlite") testDB, err := db.OpenSQLiteDB(dbPath) require.NoError(t, err) - // defer testDB.Close() + defer func() { + require.NoError(t, testDB.Close()) // will be reopened in NewTest + }() testLogger := supportlog.New() - rw := db.NewReadWriter(testLogger, testDB, interfaces.MakeNoOpDeamon(), int(retentionWindow), retentionWindow, - network.TestNetworkPassphrase) + rw := db.NewReadWriter(testLogger, testDB, interfaces.MakeNoOpDeamon(), + int(retentionWindow), retentionWindow, network.TestNetworkPassphrase) // Insert dummy ledgers into the DB writeTx, err := rw.NewTx(t.Context()) @@ -231,7 +204,12 @@ func createDbWithLedgers(t *testing.T, dbPath string, start, end, retentionWindo } require.NoError(t, writeTx.Commit(lastLedger, nil)) } - return testDB + if end != 0 { + t.Logf("Created local DB, seeded with ledgers %d-%d", start, end) + } else { + t.Logf("Created empty local DB") + } + return dbPath } func createLedger(ledgerSequence uint32) xdr.LedgerCloseMeta { @@ -251,7 +229,7 @@ func createLedger(ledgerSequence uint32) xdr.LedgerCloseMeta { } } -func makeLedgerHeader(ledgerSequence uint32, protocolVersion uint32, closeTime xdr.TimePoint) xdr.LedgerHeader { +func makeLedgerHeader(ledgerSequence, protocolVersion uint32, closeTime xdr.TimePoint) xdr.LedgerHeader { return xdr.LedgerHeader{ LedgerSeq: xdr.Uint32(ledgerSequence), LedgerVersion: xdr.Uint32(protocolVersion), diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index 33b9906c..b7ec3008 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -87,8 +87,9 @@ type TestConfig struct { // empty string to skip upgrading altogether. ApplyLimits *string - DontWaitForRPC bool // don't wait for RPC to be healthy - DelayDaemonForLedgerN int // don't start daemon until ledger N reached by core + BackfillTimeout time.Duration + IgnoreLedgerCloseTimes bool // disregard close times when ingesting ledgers + DelayDaemonForLedgerN int // don't start daemon until ledger N reached by core DatastoreConfigFunc func(*config.Config) } @@ -132,12 +133,15 @@ type Test struct { daemon *daemon.Daemon - masterAccount txnbuild.Account - shutdownOnce sync.Once - shutdown func() - onlyRPC bool + masterAccount txnbuild.Account + shutdownOnce sync.Once + shutdown func() + onlyRPC bool + ignoreLedgerCloseTimes bool datastoreConfigFunc func(*config.Config) + + backfillTimeout time.Duration } func NewTest(t testing.TB, cfg *TestConfig) *Test { @@ -160,10 +164,9 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { i.captiveCoreStoragePath = cfg.CaptiveCoreStoragePath parallel = !cfg.NoParallel i.datastoreConfigFunc = cfg.DatastoreConfigFunc + i.backfillTimeout = cfg.BackfillTimeout + i.ignoreLedgerCloseTimes = cfg.IgnoreLedgerCloseTimes - if cfg.DontWaitForRPC { - shouldWaitForRPC = false - } if cfg.OnlyRPC != nil { i.onlyRPC = true i.testPorts.TestCorePorts = cfg.OnlyRPC.CorePorts @@ -217,19 +220,7 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { if !i.runRPCInContainer() { if cfg.DelayDaemonForLedgerN != 0 { i.t.Logf("Delaying daemon start until core reaches ledger %d", cfg.DelayDaemonForLedgerN) - i.waitForLedger(cfg.DelayDaemonForLedgerN) - - // cancelIngest := func() { - // for { - // info, err := i.getCoreInfo() - // if err != nil && i.daemon != nil && info.Info.Ledger.Num >= cfg.DelayDaemonForLedgerN && i.daemon.GetIngestService() != nil { - // i.daemon.StopIngestion() - // break - // } - // time.Sleep(50 * time.Millisecond) - // } - // } - // go cancelIngest() + i.waitForCoreAtLedger(cfg.DelayDaemonForLedgerN) } i.spawnRPCDaemon() } @@ -337,7 +328,7 @@ func (i *Test) waitForCheckpoint() { ) } -func (i *Test) waitForLedger(ledger int) { +func (i *Test) waitForCoreAtLedger(ledger int) { i.t.Logf("Waiting for ledger %d...", ledger) require.Eventually(i.t, func() bool { @@ -384,6 +375,8 @@ func (i *Test) getRPConfigForDaemon() rpcConfig { archiveURL: "http://" + i.testPorts.CoreArchiveHostPort, sqlitePath: i.sqlitePath, captiveCoreHTTPQueryPort: i.testPorts.captiveCoreHTTPQueryPort, + backfillTimeout: i.backfillTimeout, + ignoreLedgerCloseTimes: i.ignoreLedgerCloseTimes, } } @@ -398,13 +391,21 @@ type rpcConfig struct { captiveCoreHTTPPort uint16 archiveURL string sqlitePath string + backfillTimeout time.Duration + ignoreLedgerCloseTimes bool } func (vars rpcConfig) toMap() map[string]string { + maxHealthyLedgerLatency := "10s" + if vars.ignoreLedgerCloseTimes { + // If we're ignoring close times, permit absurdly high latencies + maxHealthyLedgerLatency = time.Duration(1<<63 - 1).String() + } return map[string]string{ "ENDPOINT": vars.endPoint, "ADMIN_ENDPOINT": vars.adminEndpoint, "STELLAR_CORE_URL": vars.stellarCoreURL, + "BACKFILL_TIMEOUT": vars.backfillTimeout.String(), "CORE_REQUEST_TIMEOUT": "2s", "STELLAR_CORE_BINARY_PATH": vars.coreBinaryPath, "CAPTIVE_CORE_CONFIG_PATH": vars.captiveCoreConfigPath, @@ -422,7 +423,7 @@ func (vars rpcConfig) toMap() map[string]string { "INGESTION_TIMEOUT": "10m", "HISTORY_RETENTION_WINDOW": strconv.Itoa(config.OneDayOfLedgers), "CHECKPOINT_FREQUENCY": strconv.Itoa(checkpointFrequency), - "MAX_HEALTHY_LEDGER_LATENCY": "10s", + "MAX_HEALTHY_LEDGER_LATENCY": maxHealthyLedgerLatency, "PREFLIGHT_ENABLE_DEBUG": "true", } } From e524b046b41873357d732b9354ee4cdacb7ca4a8 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 16 Jan 2026 18:44:48 -0500 Subject: [PATCH 42/72] fixed race in tests, refactored backfill and changed empty DB behavior --- CHANGELOG.md | 2 +- cmd/stellar-rpc/internal/config/options.go | 14 +- cmd/stellar-rpc/internal/ingest/backfill.go | 121 +++++++++++------- .../internal/ingest/backfill_test.go | 2 +- cmd/stellar-rpc/internal/ingest/service.go | 4 - .../internal/integrationtest/backfill_test.go | 3 + 6 files changed, 93 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47a77e0e..8ac9132f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ go get -u github.com/stellar/go-stellar-sdk/protocols/rpc ``` ### Added -- Added `--backfill` configuration parameter providing synchronous backfilling of `history_archive_window` ledgers to the local DB prior to RPC starting ([#571](https://github.com/stellar/stellar-rpc/pull/571)). +- Added `--backfill` configuration parameter providing synchronous backfilling of `HISTORY_RETENTION_WINDOW` ledgers to the local DB prior to RPC starting ([#571](https://github.com/stellar/stellar-rpc/pull/571)). - Expanded `getLatestLedger` endpoint to also return `closeTime`, `headerXdr`, and `metadataXdr` ([#554](https://github.com/stellar/stellar-rpc/pull/554)). - Added `soroban-env-host` info to `version` command ([#550](https://github.com/stellar/stellar-rpc/pull/550)). - Added `--network` configuration parameter, allowing users to specify a default Stellar network (`testnet`, `pubnet`, or `futurenet`) ([#540](https://github.com/stellar/stellar-rpc/pull/540), [#543](https://github.com/stellar/stellar-rpc/pull/543)). diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index 55e5472a..a2797ce4 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -96,9 +96,17 @@ func (cfg *Config) options() Options { }, }, { - Name: "backfill-timeout", - Usage: "Timeout for backfill database", - ConfigKey: &cfg.BackfillTimeout, + Name: "backfill-timeout", + Usage: "Timeout for backfill database", + ConfigKey: &cfg.BackfillTimeout, + DefaultValue: time.Duration(0), + Validate: func(_ *Option) error { + if cfg.BackfillTimeout == time.Duration(0) { + hours := time.Duration(max(cfg.HistoryRetentionWindow/OneDayOfLedgers, 1)) + cfg.BackfillTimeout = hours * time.Hour + } + return nil + }, }, { Name: "stellar-core-timeout", diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 15881cb0..be241b58 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -29,6 +29,7 @@ type BackfillMeta struct { ingestService *Service dsInfo datastoreInfo dbInfo databaseInfo + bounds backfillBounds } type datastoreInfo struct { @@ -47,6 +48,11 @@ type databaseInfo struct { isEmpty bool } +type backfillBounds struct { + backwards db.LedgerSeqRange + forwards db.LedgerSeqRange +} + // Creates a new BackfillMeta struct func NewBackfillMeta( logger *supportlog.Entry, @@ -93,10 +99,6 @@ func NewBackfillMeta( // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { - nBackfill := cfg.HistoryRetentionWindow - if cfg.BackfillTimeout == 0 { - cfg.BackfillTimeout = time.Duration(nBackfill/config.OneDayOfLedgers) * time.Hour - } ctx, cancelBackfill := context.WithTimeout(context.Background(), cfg.BackfillTimeout) defer cancelBackfill() @@ -112,66 +114,63 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { backfill.logger.Infof("Local DB is empty, skipping precheck") } // Determine bounds for ledgers to be written to local DB in backwards and forwards phases - var ( - currentTipLedger uint32 - lBoundBackwards, rBoundBackwards uint32 // bounds for backwards backfill - lBoundForwards, rBoundForwards uint32 // bounds for forwards backfill - err error - ) - if currentTipLedger, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { + currentTipLedger, err := getLatestSeqInCDP(ctx, backfill.dsInfo.ds) + if err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - backfill.logger.Infof("Current tip ledger in cloud datastore is %d", currentTipLedger) - if currentTipLedger < nBackfill { - backfill.logger.Warnf("Datastore has fewer ledgers (%d) than retention window (%d); "+ - "backfilling all available ledgers", currentTipLedger, nBackfill) - nBackfill = currentTipLedger - } - lBoundBackwards = max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) - if backfill.dbInfo.isEmpty { - rBoundBackwards = currentTipLedger - lBoundForwards = rBoundBackwards + 1 - } else { - if currentTipLedger < backfill.dbInfo.minSeq { - // If we attempt to backfill from lBoundBackwards to currentTipLedger in this case, - // we introduce a gap missing ledgers of sequences (currentTipLedger, backfill.dbInfo.minSeq-1) - return errors.New("datastore stale: current tip is older than local DB minimum ledger") - } - rBoundBackwards = backfill.dbInfo.minSeq - 1 - lBoundForwards = backfill.dbInfo.maxSeq + 1 + bounds, nBackfill, err := backfill.setBounds(currentTipLedger, cfg.HistoryRetentionWindow) + if err != nil { + return errors.Wrap(err, "could not set backfill bounds") } + backfill.logger.Infof("Precheck and initialization passed! Starting backfill backwards phase (phase 2 of 4)") backfill.logger.Infof("Initialization/precheck completed in %s", time.Since(startP1)) startP2 := time.Now() // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window - if lBoundBackwards < rBoundBackwards { + if bounds.backwards.First < bounds.backwards.Last { backfill.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", - lBoundBackwards, rBoundBackwards) - if err := backfill.runBackfillBackwards(ctx, lBoundBackwards, rBoundBackwards); err != nil { + bounds.backwards.First, bounds.backwards.Last) + if err := backfill.runBackfillBackwards(ctx, bounds.backwards.First, bounds.backwards.Last); err != nil { return errors.Wrap(err, "backfill backwards failed") } - backfill.dbInfo.minSeq = lBoundBackwards + backfill.dbInfo.minSeq = bounds.backwards.First } else { - backfill.logger.Infof("No backwards backfill needed, local DB tail already covers retention window") + backfill.logger.Infof("No backwards backfill needed, local DB empty or DB tail extends past retention window") } backfill.logger.Infof("Backwards backfill completed in %s", time.Since(startP2)) startP3 := time.Now() // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB backfill.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") - if rBoundForwards, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { + if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - if lBoundForwards < rBoundForwards { - rBoundForwards -= (rBoundForwards % ledgersInCheckpoint) // Align to checkpoint - backfill.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", lBoundForwards, rBoundForwards) - if err = backfill.runBackfillForwards(ctx, lBoundForwards, rBoundForwards); err != nil { + bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint + if bounds.forwards.First < bounds.forwards.Last { + backfill.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", + bounds.forwards.First, bounds.forwards.Last) + if err = backfill.runBackfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { return errors.Wrap(err, "backfill forwards failed") } + if bounds.backwards.Last == 0 { + // Skipped backwards backfill, do one final forwards push to new current tip + bounds.forwards.First = bounds.forwards.Last + 1 + if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { + return errors.Wrap(err, "could not get latest ledger number from cloud datastore") + } + bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint + if bounds.forwards.First < bounds.forwards.Last { + backfill.logger.Infof("Backfilling to new current tip, ledgers [%d -> %d]", + bounds.forwards.First, bounds.forwards.Last) + if err = backfill.runBackfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { + return errors.Wrap(err, "second backfill forwards failed") + } + } + } } else { backfill.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") } // Log minimum written sequence after backwards backfill - backfill.dbInfo.maxSeq = max(rBoundForwards, backfill.dbInfo.maxSeq) + backfill.dbInfo.maxSeq = max(bounds.forwards.Last, backfill.dbInfo.maxSeq) backfill.logger.Infof("Forward backfill completed in %s", time.Since(startP3)) startP4 := time.Now() @@ -240,7 +239,11 @@ func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u lChunkBound = lBound } backfill.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := backfill.fillChunk(ctx, backfill.ingestService, tempBackend, lChunkBound, rChunkBound); err != nil { + chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) + if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { + return err + } + if err := backfill.fillChunk(ctx, backfill.ingestService, tempBackend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -262,6 +265,10 @@ func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u func (backfill *BackfillMeta) runBackfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { // Backend for forwards backfill can be persistent over multiple chunks backend, err := makeBackend(backfill.dsInfo) + ledgerRange := ledgerbackend.BoundedRange(lBound, rBound) + if err := backend.PrepareRange(ctx, ledgerRange); err != nil { + return err + } // for testing: prepareRange on entire range, then normal write/commit if err != nil { return errors.Wrap(err, "could not create ledger backend") @@ -279,7 +286,8 @@ func (backfill *BackfillMeta) runBackfillForwards(ctx context.Context, lBound ui rChunkBound := min(rBound, lChunkBound+ChunkSize-1) backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := backfill.fillChunk(ctx, backfill.ingestService, backend, lChunkBound, rChunkBound); err != nil { + chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) + if err := backfill.fillChunk(ctx, backfill.ingestService, backend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -294,9 +302,34 @@ func (backfill *BackfillMeta) fillChunk( ctx context.Context, service *Service, readBackend ledgerbackend.LedgerBackend, - left, right uint32, + ledgerRange ledgerbackend.Range, ) error { - return service.ingestRange(ctx, readBackend, ledgerbackend.BoundedRange(left, right)) + return service.ingestRange(ctx, readBackend, ledgerRange) +} + +func (backfill *BackfillMeta) setBounds(currentTip uint32, retentionWindow uint32) (backfillBounds, uint32, error) { + fillBounds := backfillBounds{} + nBackfill := min(retentionWindow, currentTip) + backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", + currentTip, nBackfill) + // fillBounds.backwards.First = max(currentTip-nBackfill+1, backfill.dsInfo.minSeq) // lBoundBackwards + // if empty, skip backwards backfill + if backfill.dbInfo.isEmpty { + // fillBounds.backwards.Last = currentTip // rBoundBackwards = currentTipLedger + // fillBounds.forwards.First = fillBounds.backwards.Last + 1 // lBoundForwards = rBoundBackwards + 1 + fillBounds.forwards.First = max(currentTip-nBackfill+1, backfill.dsInfo.minSeq) + } else { + if currentTip < backfill.dbInfo.minSeq { + // If we attempt to backfill from lBoundBackwards to currentTipLedger in this case, + // we introduce a gap missing ledgers of sequences (currentTipLedger, backfill.dbInfo.minSeq-1) + return backfillBounds{}, 0, + errors.New("datastore stale: current tip is older than local DB minimum ledger") + } + fillBounds.backwards.First = max(currentTip-nBackfill+1, backfill.dsInfo.minSeq) // lBoundBackwards + fillBounds.backwards.Last = backfill.dbInfo.minSeq - 1 + fillBounds.forwards.First = backfill.dbInfo.maxSeq + 1 + } + return fillBounds, nBackfill, nil } // Creates a buffered storage backend for the given datastore diff --git a/cmd/stellar-rpc/internal/ingest/backfill_test.go b/cmd/stellar-rpc/internal/ingest/backfill_test.go index cf148f69..0631fa2d 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill_test.go +++ b/cmd/stellar-rpc/internal/ingest/backfill_test.go @@ -44,7 +44,7 @@ func TestGapDetection(t *testing.T) { require.NoError(t, writeTx.Commit(ledgers[len(ledgers)-1], nil)) backfill := &BackfillMeta{ logger: testLogger, - dbInfo: databaseInfo{rw: rw, reader: db.NewLedgerReader(testDB)}, + dbInfo: databaseInfo{reader: db.NewLedgerReader(testDB)}, } _, _, err = backfill.verifyDbGapless(ctx) require.Error(t, err) diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index d58efa38..16d0973e 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -252,10 +252,6 @@ func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBacken return err } - if err := backend.PrepareRange(ctx, seqRange); err != nil { - return err - } - defer func() { if err := tx.Rollback(); err != nil { s.logger.WithError(err).Warn("could not rollback ingest write transactions") diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 51b1803e..ae8a942f 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -85,13 +85,16 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint return l.Sequence >= uint32(stopLedger) }, time.Duration(stopLedger)*time.Second, true) // stop core ingestion once we reach the target t.Logf("Core ingestion complete, ledger %d fetched from captive core", coreIngestionComplete.Sequence) + time.Sleep(100 * time.Millisecond) // let final ledgers writes commit to DB before reading + // Verify ledgers present in DB // We cannot use GetLedgers as it will fall back to the datastore, which is cheating reader := db.NewLedgerReader(testDb) ledgers, err := reader.GetLedgerSequencesInRange(t.Context(), datastoreStart, uint32(stopLedger)) require.NoError(t, err) len := uint32(len(ledgers)) require.Equal(t, retentionWindow, len, "expected to have ingested %d ledgers, got %d", retentionWindow, len) + // Ensure at least one ledger from datastore and at least one from core ingestion require.LessOrEqual(t, ledgers[0], datastoreEnd, "did not ingest ledgers from datastore: "+ fmt.Sprintf("expected first ledger <= %d, got %d", datastoreEnd, ledgers[len-1])) require.Greater(t, ledgers[len-1], datastoreEnd, "did not ingest ledgers from core after backfill: "+ From 9da6f4bb8dc81c2ce48089d6e70a321eb32f5b22 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 12:16:22 -0500 Subject: [PATCH 43/72] linter fixes and refactoring --- cmd/stellar-rpc/internal/ingest/backfill.go | 34 +++++++++---------- .../internal/integrationtest/backfill_test.go | 17 +++++----- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index be241b58..949f50f0 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -29,7 +29,6 @@ type BackfillMeta struct { ingestService *Service dsInfo datastoreInfo dbInfo databaseInfo - bounds backfillBounds } type datastoreInfo struct { @@ -126,8 +125,9 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { backfill.logger.Infof("Precheck and initialization passed! Starting backfill backwards phase (phase 2 of 4)") backfill.logger.Infof("Initialization/precheck completed in %s", time.Since(startP1)) startP2 := time.Now() + skipBackwards := bounds.backwards.First >= bounds.backwards.Last // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window - if bounds.backwards.First < bounds.backwards.Last { + if !skipBackwards { backfill.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", bounds.backwards.First, bounds.backwards.Last) if err := backfill.runBackfillBackwards(ctx, bounds.backwards.First, bounds.backwards.Last); err != nil { @@ -151,24 +151,24 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { if err = backfill.runBackfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { return errors.Wrap(err, "backfill forwards failed") } - if bounds.backwards.Last == 0 { - // Skipped backwards backfill, do one final forwards push to new current tip - bounds.forwards.First = bounds.forwards.Last + 1 - if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { - return errors.Wrap(err, "could not get latest ledger number from cloud datastore") - } - bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint - if bounds.forwards.First < bounds.forwards.Last { - backfill.logger.Infof("Backfilling to new current tip, ledgers [%d -> %d]", - bounds.forwards.First, bounds.forwards.Last) - if err = backfill.runBackfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { - return errors.Wrap(err, "second backfill forwards failed") - } - } - } } else { backfill.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") } + if skipBackwards { + // Skipped backwards backfill, do one final forwards push to new current tip + bounds.forwards.First = bounds.forwards.Last + 1 + if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { + return errors.Wrap(err, "could not get latest ledger number from cloud datastore") + } + bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint + if bounds.forwards.First < bounds.forwards.Last { + backfill.logger.Infof("Backfilling to new current tip, ledgers [%d -> %d]", + bounds.forwards.First, bounds.forwards.Last) + if err = backfill.runBackfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { + return errors.Wrap(err, "second backfill forwards failed") + } + } + } // Log minimum written sequence after backwards backfill backfill.dbInfo.maxSeq = max(bounds.forwards.Last, backfill.dbInfo.maxSeq) backfill.logger.Infof("Forward backfill completed in %s", time.Since(startP3)) diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index ae8a942f..969059fc 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -49,9 +49,8 @@ func TestBackfillLedgersAtStartOfDB(t *testing.T) { func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint32) { var ( datastoreStart, datastoreEnd uint32 = 2, 38 // ledgers present in datastore - retentionWindow uint32 = 64 - checkpointFrequency int = 64 - stopLedger int = checkpointFrequency + 2 // final ledger to ingest + retentionWindow uint32 = 64 // 8 artificial checkpoints worth of ledgers + stopLedger = 66 // final ledger to ingest ) t.Setenv("STELLAR_RPC_INTEGRATION_TESTS_CAPTIVE_CORE_BIN", "/usr/local/bin/stellar-core") @@ -92,13 +91,13 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint reader := db.NewLedgerReader(testDb) ledgers, err := reader.GetLedgerSequencesInRange(t.Context(), datastoreStart, uint32(stopLedger)) require.NoError(t, err) - len := uint32(len(ledgers)) - require.Equal(t, retentionWindow, len, "expected to have ingested %d ledgers, got %d", retentionWindow, len) + count := uint32(len(ledgers)) + require.Equal(t, retentionWindow, count, "expected to have ingested %d ledgers, got %d", retentionWindow, count) // Ensure at least one ledger from datastore and at least one from core ingestion require.LessOrEqual(t, ledgers[0], datastoreEnd, "did not ingest ledgers from datastore: "+ - fmt.Sprintf("expected first ledger <= %d, got %d", datastoreEnd, ledgers[len-1])) - require.Greater(t, ledgers[len-1], datastoreEnd, "did not ingest ledgers from core after backfill: "+ - fmt.Sprintf("expected last ledger > %d, got %d", datastoreEnd, ledgers[len-1])) + fmt.Sprintf("expected first ledger <= %d, got %d", datastoreEnd, ledgers[count-1])) + require.Greater(t, ledgers[count-1], datastoreEnd, "did not ingest ledgers from core after backfill: "+ + fmt.Sprintf("expected last ledger > %d, got %d", datastoreEnd, ledgers[count-1])) // Verify they're contiguous prevSequence := ledgers[0] for i, sequence := range ledgers[1:] { @@ -106,7 +105,7 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint "gap detected at position %d: expected %d, got %d", i, prevSequence+1, sequence) prevSequence = sequence } - t.Logf("Verified ledgers %d-%d present in local DB", ledgers[0], ledgers[len-1]) + t.Logf("Verified ledgers %d-%d present in local DB", ledgers[0], ledgers[count-1]) } func waitUntilLedgerIngested(t *testing.T, test *infrastructure.Test, rpcClient *client.Client, From 7c669e6225ac0d92c8efcfd7c3287e7f3e5f026d Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 13:02:22 -0500 Subject: [PATCH 44/72] refactored to reduce cyclomatic complexity --- cmd/stellar-rpc/internal/ingest/backfill.go | 89 ++++++++++++++------- 1 file changed, 62 insertions(+), 27 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 949f50f0..1e2ad28e 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -101,16 +101,10 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { ctx, cancelBackfill := context.WithTimeout(context.Background(), cfg.BackfillTimeout) defer cancelBackfill() - backfill.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") - ledgersInCheckpoint := cfg.CheckpointFrequency - startP1 := time.Now() // Phase 1: precheck to ensure no pre-existing gaps in local DB - if !backfill.dbInfo.isEmpty { - if _, _, err := backfill.verifyDbGapless(ctx); err != nil { - return errors.Wrap(err, "backfill precheck failed") - } - } else { - backfill.logger.Infof("Local DB is empty, skipping precheck") + startP1 := time.Now() + if err := backfill.runPrecheck(ctx); err != nil { + return err } // Determine bounds for ledgers to be written to local DB in backwards and forwards phases currentTipLedger, err := getLatestSeqInCDP(ctx, backfill.dsInfo.ds) @@ -121,26 +115,65 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { if err != nil { return errors.Wrap(err, "could not set backfill bounds") } - - backfill.logger.Infof("Precheck and initialization passed! Starting backfill backwards phase (phase 2 of 4)") backfill.logger.Infof("Initialization/precheck completed in %s", time.Since(startP1)) + startP2 := time.Now() - skipBackwards := bounds.backwards.First >= bounds.backwards.Last // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window + skipBackwards, err := backfill.runBackfillBackwards(ctx, bounds) + if err != nil { + return err + } + backfill.logger.Infof("Backwards backfill completed in %s", time.Since(startP2)) + + startP3 := time.Now() + // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB + if err = backfill.runBackfillForwards(ctx, &bounds, skipBackwards, cfg.CheckpointFrequency); err != nil { + return err + } + backfill.logger.Infof("Forward backfill completed in %s", time.Since(startP3)) + + // Phase 4: verify no gaps in local DB after backfill + return backfill.runPostcheck(ctx, nBackfill) +} + +func (backfill *BackfillMeta) runPrecheck(ctx context.Context) error { + backfill.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") + if !backfill.dbInfo.isEmpty { + if _, _, err := backfill.verifyDbGapless(ctx); err != nil { + return errors.Wrap(err, "backfill precheck failed") + } + } else { + backfill.logger.Infof("Local DB is empty, skipping precheck") + } + backfill.logger.Infof("Precheck and initialization passed, no gaps detected in local DB") + return nil +} + +func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, bounds backfillBounds) (bool, error) { + backfill.logger.Infof("Starting backfill backwards phase (phase 2 of 4)") + skipBackwards := bounds.backwards.First >= bounds.backwards.Last if !skipBackwards { backfill.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", bounds.backwards.First, bounds.backwards.Last) - if err := backfill.runBackfillBackwards(ctx, bounds.backwards.First, bounds.backwards.Last); err != nil { - return errors.Wrap(err, "backfill backwards failed") + if err := backfill.backfillBackwards(ctx, bounds.backwards.First, bounds.backwards.Last); err != nil { + return false, errors.Wrap(err, "backfill backwards failed") } backfill.dbInfo.minSeq = bounds.backwards.First + backfill.logger.Infof("Backward backfill of old ledgers complete") } else { backfill.logger.Infof("No backwards backfill needed, local DB empty or DB tail extends past retention window") } - backfill.logger.Infof("Backwards backfill completed in %s", time.Since(startP2)) - startP3 := time.Now() - // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB - backfill.logger.Infof("Backward backfill of old ledgers complete! Starting forward backfill (phase 3 of 4)") + return skipBackwards, nil +} + +func (backfill *BackfillMeta) runBackfillForwards( + ctx context.Context, + bounds *backfillBounds, + skipBackwards bool, + ledgersInCheckpoint uint32, +) error { + backfill.logger.Infof("Starting forward backfill (phase 3 of 4)") + var err error if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } @@ -148,7 +181,7 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { if bounds.forwards.First < bounds.forwards.Last { backfill.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", bounds.forwards.First, bounds.forwards.Last) - if err = backfill.runBackfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { + if err = backfill.backfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { return errors.Wrap(err, "backfill forwards failed") } } else { @@ -162,20 +195,22 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { } bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint if bounds.forwards.First < bounds.forwards.Last { - backfill.logger.Infof("Backfilling to new current tip, ledgers [%d -> %d]", + backfill.logger.Infof("Backfilling to refreshed current tip, ledgers [%d -> %d]", bounds.forwards.First, bounds.forwards.Last) - if err = backfill.runBackfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { + if err = backfill.backfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { return errors.Wrap(err, "second backfill forwards failed") } + backfill.logger.Infof("Second forward backfill to new current tip complete") } } - // Log minimum written sequence after backwards backfill backfill.dbInfo.maxSeq = max(bounds.forwards.Last, backfill.dbInfo.maxSeq) - backfill.logger.Infof("Forward backfill completed in %s", time.Since(startP3)) + backfill.logger.Infof("Forward backfill of recent ledgers complete") + return nil +} +func (backfill *BackfillMeta) runPostcheck(ctx context.Context, nBackfill uint32) error { startP4 := time.Now() - // Phase 4: verify no gaps in local DB after backfill - backfill.logger.Infof("Forward backfill complete, starting post-backfill verification") + backfill.logger.Infof("Starting post-backfill verification") minSeq, maxSeq, err := backfill.verifyDbGapless(ctx) count := maxSeq - minSeq + 1 if err != nil { @@ -219,7 +254,7 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers -func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound uint32, rBound uint32) error { +func (backfill *BackfillMeta) backfillBackwards(ctx context.Context, lBound uint32, rBound uint32) error { for rChunkBound := rBound; rChunkBound >= lBound; { if err := ctx.Err(); err != nil { return err @@ -262,7 +297,7 @@ func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, lBound u // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func (backfill *BackfillMeta) runBackfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { +func (backfill *BackfillMeta) backfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { // Backend for forwards backfill can be persistent over multiple chunks backend, err := makeBackend(backfill.dsInfo) ledgerRange := ledgerbackend.BoundedRange(lBound, rBound) From eb7639d1086de04794f5f8bfe3ee543bf90d595d Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 13:21:44 -0500 Subject: [PATCH 45/72] linter duration multiplication fix --- cmd/stellar-rpc/internal/config/options.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index a2797ce4..20fdf65a 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -102,8 +102,8 @@ func (cfg *Config) options() Options { DefaultValue: time.Duration(0), Validate: func(_ *Option) error { if cfg.BackfillTimeout == time.Duration(0) { - hours := time.Duration(max(cfg.HistoryRetentionWindow/OneDayOfLedgers, 1)) - cfg.BackfillTimeout = hours * time.Hour + hours := max(cfg.HistoryRetentionWindow/OneDayOfLedgers, 1) + cfg.BackfillTimeout = time.Duration(hours) * time.Hour } return nil }, From e6addaf69727b277857e915c3019a3bb62c27646 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 13:24:09 -0500 Subject: [PATCH 46/72] added nolint for funcorder linter directive in test.go --- cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index b7ec3008..d37f88ae 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -1,3 +1,4 @@ +//nolint:funcorder // exported and unexported methods interleaved for readability package infrastructure import ( From 069348d43e11647664425f0facc8819f0d5cebe0 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 15:06:30 -0500 Subject: [PATCH 47/72] fixed minor integration test infra bug --- cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index d37f88ae..3b4d095a 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -219,7 +219,7 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { i.waitForCheckpoint() } if !i.runRPCInContainer() { - if cfg.DelayDaemonForLedgerN != 0 { + if cfg != nil && cfg.DelayDaemonForLedgerN != 0 { i.t.Logf("Delaying daemon start until core reaches ledger %d", cfg.DelayDaemonForLedgerN) i.waitForCoreAtLedger(cfg.DelayDaemonForLedgerN) } From c12256e56e383fd8ef10a162f793f581baaaaa34 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 17:27:14 -0500 Subject: [PATCH 48/72] clean/restructured code, decreased excessive logging --- cmd/stellar-rpc/internal/ingest/backfill.go | 208 +++++++++----------- 1 file changed, 90 insertions(+), 118 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 1e2ad28e..69f98ebc 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -48,8 +48,9 @@ type databaseInfo struct { } type backfillBounds struct { - backwards db.LedgerSeqRange - forwards db.LedgerSeqRange + backwards db.LedgerSeqRange + forwards db.LedgerSeqRange + skipBackwards bool } // Creates a new BackfillMeta struct @@ -101,43 +102,31 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { ctx, cancelBackfill := context.WithTimeout(context.Background(), cfg.BackfillTimeout) defer cancelBackfill() - // Phase 1: precheck to ensure no pre-existing gaps in local DB - startP1 := time.Now() + // Ensure no pre-existing gaps in local DB if err := backfill.runPrecheck(ctx); err != nil { return err } - // Determine bounds for ledgers to be written to local DB in backwards and forwards phases - currentTipLedger, err := getLatestSeqInCDP(ctx, backfill.dsInfo.ds) - if err != nil { - return errors.Wrap(err, "could not get latest ledger number from cloud datastore") - } - bounds, nBackfill, err := backfill.setBounds(currentTipLedger, cfg.HistoryRetentionWindow) + bounds, nBackfill, err := backfill.setBounds(ctx, cfg.HistoryRetentionWindow) if err != nil { return errors.Wrap(err, "could not set backfill bounds") } - backfill.logger.Infof("Initialization/precheck completed in %s", time.Since(startP1)) - startP2 := time.Now() - // Phase 2: backfill backwards from minimum written ledger/current tip towards oldest ledger in retention window - skipBackwards, err := backfill.runBackfillBackwards(ctx, bounds) - if err != nil { - return err + if !bounds.skipBackwards { + if err := backfill.runBackfillBackwards(ctx, bounds); err != nil { + return err + } } - backfill.logger.Infof("Backwards backfill completed in %s", time.Since(startP2)) - startP3 := time.Now() - // Phase 3: backfill forwards from maximum written ledger towards latest ledger to put in DB - if err = backfill.runBackfillForwards(ctx, &bounds, skipBackwards, cfg.CheckpointFrequency); err != nil { + if err := backfill.runBackfillForwards(ctx, &bounds, cfg.CheckpointFrequency); err != nil { return err } - backfill.logger.Infof("Forward backfill completed in %s", time.Since(startP3)) - // Phase 4: verify no gaps in local DB after backfill return backfill.runPostcheck(ctx, nBackfill) } +// Ensures local DB is gapless prior to backfilling func (backfill *BackfillMeta) runPrecheck(ctx context.Context) error { - backfill.logger.Infof("Starting initialization/precheck for backfilling the local database (phase 1 of 4)") + backfill.logger.Infof("Starting initialization/precheck for backfilling the local database") if !backfill.dbInfo.isEmpty { if _, _, err := backfill.verifyDbGapless(ctx); err != nil { return errors.Wrap(err, "backfill precheck failed") @@ -149,58 +138,83 @@ func (backfill *BackfillMeta) runPrecheck(ctx context.Context) error { return nil } -func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, bounds backfillBounds) (bool, error) { - backfill.logger.Infof("Starting backfill backwards phase (phase 2 of 4)") - skipBackwards := bounds.backwards.First >= bounds.backwards.Last - if !skipBackwards { - backfill.logger.Infof("Backfilling to left edge of retention window, ledgers [%d <- %d]", - bounds.backwards.First, bounds.backwards.Last) - if err := backfill.backfillBackwards(ctx, bounds.backwards.First, bounds.backwards.Last); err != nil { - return false, errors.Wrap(err, "backfill backwards failed") - } - backfill.dbInfo.minSeq = bounds.backwards.First - backfill.logger.Infof("Backward backfill of old ledgers complete") +// Sets the bounds for backwards and forwards backfill phases, determines number of ledgers to backfill, and +// whether backwards backfill phase should be skipped +func (backfill *BackfillMeta) setBounds( + ctx context.Context, + retentionWindow uint32, +) (backfillBounds, uint32, error) { + // Determine bounds for ledgers to be written to local DB in backwards and forwards phases + currentTipLedger, err := getLatestSeqInCDP(ctx, backfill.dsInfo.ds) + if err != nil { + return backfillBounds{}, 0, + errors.Wrap(err, "could not get latest ledger number from cloud datastore") + } + fillBounds := backfillBounds{} + nBackfill := min(retentionWindow, currentTipLedger) + fillBounds.skipBackwards = false + backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", + currentTipLedger, nBackfill) + // if initial DB empty, skip backwards backfill + if backfill.dbInfo.isEmpty { + fillBounds.forwards.First = max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) + fillBounds.skipBackwards = true } else { - backfill.logger.Infof("No backwards backfill needed, local DB empty or DB tail extends past retention window") + if currentTipLedger < backfill.dbInfo.minSeq { + // If we attempt to backfill from lBoundBackwards to currentTipLedger in this case, + // we introduce a gap missing ledgers of sequences (currentTipLedger, backfill.dbInfo.minSeq-1) + return backfillBounds{}, 0, + errors.New("datastore stale: current tip is older than local DB minimum ledger") + } + fillBounds.backwards.First = max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) + fillBounds.backwards.Last = backfill.dbInfo.minSeq - 1 + fillBounds.forwards.First = backfill.dbInfo.maxSeq + 1 } - return skipBackwards, nil + return fillBounds, nBackfill, nil } +// Backfills the local DB with older ledgers from newest to oldest within the retention window +func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, bounds backfillBounds) error { + backfill.logger.Infof("Backfilling backwards to the left edge of retention window, ledgers [%d <- %d]", + bounds.backwards.First, bounds.backwards.Last) + lBound, rBound := bounds.backwards.First, bounds.backwards.Last + if err := backfill.backfillChunksBackwards(ctx, lBound, rBound); err != nil { + return errors.Wrap(err, "backfill backwards failed") + } + backfill.dbInfo.minSeq = bounds.backwards.First + backfill.logger.Infof("Backward backfill of old ledgers complete") + + return nil +} + +// Backfills the local DB with older ledgers from oldest to newest within the retention window func (backfill *BackfillMeta) runBackfillForwards( ctx context.Context, bounds *backfillBounds, - skipBackwards bool, ledgersInCheckpoint uint32, ) error { - backfill.logger.Infof("Starting forward backfill (phase 3 of 4)") - var err error - if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { - return errors.Wrap(err, "could not get latest ledger number from cloud datastore") + numIterations := 1 + // If we skipped backwards backfill, do a second forwards push to a refreshed current tip + if bounds.skipBackwards { + numIterations = 2 } - bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint - if bounds.forwards.First < bounds.forwards.Last { - backfill.logger.Infof("Backfilling to current tip, ledgers [%d -> %d]", - bounds.forwards.First, bounds.forwards.Last) - if err = backfill.backfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { - return errors.Wrap(err, "backfill forwards failed") - } - } else { - backfill.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") - } - if skipBackwards { - // Skipped backwards backfill, do one final forwards push to new current tip - bounds.forwards.First = bounds.forwards.Last + 1 + var err error + for i := 0; i < numIterations; i++ { if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint if bounds.forwards.First < bounds.forwards.Last { - backfill.logger.Infof("Backfilling to refreshed current tip, ledgers [%d -> %d]", + backfill.logger.Infof("Backfilling forwards to the current datastore tip, ledgers [%d -> %d]", bounds.forwards.First, bounds.forwards.Last) - if err = backfill.backfillForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { - return errors.Wrap(err, "second backfill forwards failed") + if err = backfill.backfillChunksForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { + return errors.Wrap(err, "backfill forwards failed") } - backfill.logger.Infof("Second forward backfill to new current tip complete") + } else { + backfill.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") + } + if bounds.skipBackwards { + bounds.forwards.First = bounds.forwards.Last + 1 } } backfill.dbInfo.maxSeq = max(bounds.forwards.Last, backfill.dbInfo.maxSeq) @@ -208,8 +222,8 @@ func (backfill *BackfillMeta) runBackfillForwards( return nil } +// Verifies backfilled ledgers are gapless and meet retention window requirements func (backfill *BackfillMeta) runPostcheck(ctx context.Context, nBackfill uint32) error { - startP4 := time.Now() backfill.logger.Infof("Starting post-backfill verification") minSeq, maxSeq, err := backfill.verifyDbGapless(ctx) count := maxSeq - minSeq + 1 @@ -218,10 +232,9 @@ func (backfill *BackfillMeta) runPostcheck(ctx context.Context, nBackfill uint32 } if count+ledgerThreshold < nBackfill { return fmt.Errorf("post-backfill verification failed: expected at least %d ledgers, "+ - "got %d ledgers (exceeds acceptable threshold of %d ledgers)", nBackfill, count, ledgerThreshold) + "got %d ledgers (exceeds acceptable threshold of %d missing ledgers)", nBackfill, count, ledgerThreshold) } backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) - backfill.logger.Infof("Post-backfill verification completed in %s", time.Since(startP4)) return nil } @@ -236,7 +249,7 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint } // Get sequence number of highest/lowest ledgers in local DB minDbSeq, maxDbSeq := ledgerRange.FirstLedger.Sequence, ledgerRange.LastLedger.Sequence - backfill.logger.Infof("DB verify: checking for gaps in [%d, %d]", + backfill.logger.Debugf("DB verify: checking for gaps in [%d, %d]", minDbSeq, maxDbSeq) expectedCount := maxDbSeq - minDbSeq + 1 sequences, err := backfill.dbInfo.reader.GetLedgerSequencesInRange(ctx, minDbSeq, maxDbSeq) @@ -254,7 +267,7 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers -func (backfill *BackfillMeta) backfillBackwards(ctx context.Context, lBound uint32, rBound uint32) error { +func (backfill *BackfillMeta) backfillChunksBackwards(ctx context.Context, lBound uint32, rBound uint32) error { for rChunkBound := rBound; rChunkBound >= lBound; { if err := ctx.Err(); err != nil { return err @@ -278,7 +291,7 @@ func (backfill *BackfillMeta) backfillBackwards(ctx context.Context, lBound uint if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { return err } - if err := backfill.fillChunk(ctx, backfill.ingestService, tempBackend, chunkRange); err != nil { + if backfill.ingestService.ingestRange(ctx, tempBackend, chunkRange) != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -297,14 +310,9 @@ func (backfill *BackfillMeta) backfillBackwards(ctx context.Context, lBound uint // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards the current ledger tip -func (backfill *BackfillMeta) backfillForwards(ctx context.Context, lBound uint32, rBound uint32) error { +func (backfill *BackfillMeta) backfillChunksForwards(ctx context.Context, lBound uint32, rBound uint32) error { // Backend for forwards backfill can be persistent over multiple chunks backend, err := makeBackend(backfill.dsInfo) - ledgerRange := ledgerbackend.BoundedRange(lBound, rBound) - if err := backend.PrepareRange(ctx, ledgerRange); err != nil { - return err - } - // for testing: prepareRange on entire range, then normal write/commit if err != nil { return errors.Wrap(err, "could not create ledger backend") } @@ -314,15 +322,19 @@ func (backfill *BackfillMeta) backfillForwards(ctx context.Context, lBound uint3 } }() + ledgerRange := ledgerbackend.BoundedRange(lBound, rBound) + if err := backend.PrepareRange(ctx, ledgerRange); err != nil { + return err + } for lChunkBound := lBound; lChunkBound <= rBound; lChunkBound += ChunkSize { if err := ctx.Err(); err != nil { return err } - rChunkBound := min(rBound, lChunkBound+ChunkSize-1) - backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) - if err := backfill.fillChunk(ctx, backfill.ingestService, backend, chunkRange); err != nil { + + backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) + if backfill.ingestService.ingestRange(ctx, backend, chunkRange) != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -331,48 +343,15 @@ func (backfill *BackfillMeta) backfillForwards(ctx context.Context, lBound uint3 return nil } -// Fills a chunk of ledgers [left, right] from the given backend into the local DB -// Fills from left to right (i.e. sequence number ascending) -func (backfill *BackfillMeta) fillChunk( - ctx context.Context, - service *Service, - readBackend ledgerbackend.LedgerBackend, - ledgerRange ledgerbackend.Range, -) error { - return service.ingestRange(ctx, readBackend, ledgerRange) -} - -func (backfill *BackfillMeta) setBounds(currentTip uint32, retentionWindow uint32) (backfillBounds, uint32, error) { - fillBounds := backfillBounds{} - nBackfill := min(retentionWindow, currentTip) - backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", - currentTip, nBackfill) - // fillBounds.backwards.First = max(currentTip-nBackfill+1, backfill.dsInfo.minSeq) // lBoundBackwards - // if empty, skip backwards backfill - if backfill.dbInfo.isEmpty { - // fillBounds.backwards.Last = currentTip // rBoundBackwards = currentTipLedger - // fillBounds.forwards.First = fillBounds.backwards.Last + 1 // lBoundForwards = rBoundBackwards + 1 - fillBounds.forwards.First = max(currentTip-nBackfill+1, backfill.dsInfo.minSeq) - } else { - if currentTip < backfill.dbInfo.minSeq { - // If we attempt to backfill from lBoundBackwards to currentTipLedger in this case, - // we introduce a gap missing ledgers of sequences (currentTipLedger, backfill.dbInfo.minSeq-1) - return backfillBounds{}, 0, - errors.New("datastore stale: current tip is older than local DB minimum ledger") - } - fillBounds.backwards.First = max(currentTip-nBackfill+1, backfill.dsInfo.minSeq) // lBoundBackwards - fillBounds.backwards.Last = backfill.dbInfo.minSeq - 1 - fillBounds.forwards.First = backfill.dbInfo.maxSeq + 1 - } - return fillBounds, nBackfill, nil -} - // Creates a buffered storage backend for the given datastore func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { + ledgersPerFile := dsInfo.schema.LedgersPerFile + bufferSize := max(1024/ledgersPerFile, 10) // use fewer files if many ledgers per file + numWorkers := max(bufferSize/10, 5) // approx. 1 worker per 10 buffered files backend, err := ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ - BufferSize: 1024, // buffer is in number of FILES - NumWorkers: 100, + BufferSize: bufferSize, // number of files to buffer + NumWorkers: numWorkers, // number of concurrent GCS fetchers; each shares one buffer of above size RetryLimit: 3, RetryWait: 5 * time.Second, }, @@ -382,13 +361,6 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { return backend, err } -// Karthik GCS Configuration -// GCSBucketPath = "sdf-ledger-close-meta/v1/ledgers/pubnet" -// GCSBufferSize = 10000 -// GCSNumWorkers = 200 -// GCSRetryLimit = 3 -// GCSRetryWait = 5 * time.Second - // Gets the latest ledger number stored in the cloud Datastore/datalake // Stores it in tip pointer func getLatestSeqInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { From 604cbf527665dda2ca44e5455ab123e5abf4a56b Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 17:28:12 -0500 Subject: [PATCH 49/72] minor comment update --- cmd/stellar-rpc/internal/ingest/backfill.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 69f98ebc..f81c2621 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -111,12 +111,14 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { return errors.Wrap(err, "could not set backfill bounds") } + // If DB isn't empty, backfill backwards from local DB tail to the left edge of retention window if !bounds.skipBackwards { if err := backfill.runBackfillBackwards(ctx, bounds); err != nil { return err } } + // Backfill from local DB head (or left edge of retention window, if empty) to current tip of datastore if err := backfill.runBackfillForwards(ctx, &bounds, cfg.CheckpointFrequency); err != nil { return err } From 8f5c8b3adba9ad442e25176e192b282923b0902e Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 17:40:20 -0500 Subject: [PATCH 50/72] removed daemon logging, linter fixes --- cmd/stellar-rpc/internal/daemon/daemon.go | 12 ------------ cmd/stellar-rpc/internal/ingest/backfill.go | 6 +++--- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index e3bd3d1b..8719ef79 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -181,14 +181,7 @@ func newCaptiveCore(cfg *config.Config, logger *supportlog.Entry) (*ledgerbacken } func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { - startupStart := time.Now() logger = setupLogger(cfg, logger) - defer func() { - dur := time.Since(startupStart) - logger.WithFields(supportlog.F{ - "duration_ms": dur.Milliseconds(), - }).Info("backfill_done") - }() core := mustCreateCaptiveCore(cfg, logger) historyArchive := mustCreateHistoryArchive(cfg, logger) metricsRegistry := prometheus.NewRegistry() @@ -220,9 +213,6 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { var ingestCfg ingest.Config daemon.ingestService, ingestCfg = createIngestService(cfg, logger, daemon, feewindows, historyArchive, rw) if cfg.Backfill { - backfillStart := time.Now() - timerLog := logger.WithFields(supportlog.F{"backfill_ID": backfillStart}) - timerLog.Info("backfill_start") backfillMeta, err := ingest.NewBackfillMeta( logger, daemon.ingestService, @@ -239,8 +229,6 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { // Clear the DB cache and fee windows so they re-populate from the database daemon.db.ResetCache() feewindows.Reset() - logger.Infof("Backfill completed in %s", time.Since(backfillStart)) - timerLog.Info("backfill_done") } // Start ingestion service only after backfill is complete ingest.StartService(daemon.ingestService, ingestCfg) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index f81c2621..1b12eec8 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -201,7 +201,7 @@ func (backfill *BackfillMeta) runBackfillForwards( numIterations = 2 } var err error - for i := 0; i < numIterations; i++ { + for range numIterations { if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } @@ -293,7 +293,7 @@ func (backfill *BackfillMeta) backfillChunksBackwards(ctx context.Context, lBoun if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { return err } - if backfill.ingestService.ingestRange(ctx, tempBackend, chunkRange) != nil { + if err := backfill.ingestService.ingestRange(ctx, tempBackend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", @@ -336,7 +336,7 @@ func (backfill *BackfillMeta) backfillChunksForwards(ctx context.Context, lBound chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) - if backfill.ingestService.ingestRange(ctx, backend, chunkRange) != nil { + if err := backfill.ingestService.ingestRange(ctx, backend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", From df347bcd849714a5362ed7a3da0296879ddb055c Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 20 Jan 2026 17:43:14 -0500 Subject: [PATCH 51/72] removed integration test hardcoded captive core binary path --- cmd/stellar-rpc/internal/integrationtest/backfill_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 969059fc..a1cae931 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -53,8 +53,6 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint stopLedger = 66 // final ledger to ingest ) - t.Setenv("STELLAR_RPC_INTEGRATION_TESTS_CAPTIVE_CORE_BIN", "/usr/local/bin/stellar-core") - gcsServer, makeDatastoreConfig := makeNewFakeGCSServer(t, datastoreStart, datastoreEnd, retentionWindow) defer gcsServer.Stop() From 66e3c5c093bd834ffaedee211e8da8b5a47b7d74 Mon Sep 17 00:00:00 2001 From: Christian Jonas <86204361+cjonas9@users.noreply.github.com> Date: Tue, 20 Jan 2026 17:47:39 -0500 Subject: [PATCH 52/72] delete accidentally committed toml --- rpc-config-backfill-pubnet-testing.toml | 255 ------------------------ 1 file changed, 255 deletions(-) delete mode 100644 rpc-config-backfill-pubnet-testing.toml diff --git a/rpc-config-backfill-pubnet-testing.toml b/rpc-config-backfill-pubnet-testing.toml deleted file mode 100644 index 482f6028..00000000 --- a/rpc-config-backfill-pubnet-testing.toml +++ /dev/null @@ -1,255 +0,0 @@ - -# Admin endpoint to listen and serve on. WARNING: this should not be accessible -# from the Internet and does not use TLS. "" (default) disables the admin server -# ADMIN_ENDPOINT = "" - -# path to additional configuration for the Stellar Core configuration file used -# by captive core. It must, at least, include enough details to define a quorum -# set -CAPTIVE_CORE_CONFIG_PATH = "/Users/christian/Desktop/stellar-go-sdk/ingest/ledgerbackend/configs/captive-core-pubnet.cfg" - -# Storage location for Captive Core bucket data -CAPTIVE_CORE_STORAGE_PATH = "/Users/christian/Desktop/stellar-rpc/storage" - -BACKFILL = true - -# establishes how many ledgers exist between checkpoints, do NOT change this -# unless you really know what you are doing -CHECKPOINT_FREQUENCY = 64 - -# configures classic fee stats retention window expressed in number of ledgers -CLASSIC_FEE_STATS_RETENTION_WINDOW = 10 - -# SQLite DB path -DB_PATH = "soroban_rpc_backfill_pubnet_test.sqlite" - -# Default cap on the amount of events included in a single getEvents response -DEFAULT_EVENTS_LIMIT = 100 - -# Default cap on the amount of ledgers included in a single getLedgers response -DEFAULT_LEDGERS_LIMIT = 50 - -# Default cap on the amount of transactions included in a single getTransactions -# response -DEFAULT_TRANSACTIONS_LIMIT = 50 - -# Endpoint to listen and serve on -ENDPOINT = "localhost:8000" - -# The friendbot URL to be returned by getNetwork endpoint -# FRIENDBOT_URL = "" - -# comma-separated list of stellar history archives to connect with -HISTORY_ARCHIVE_URLS = ["http://history.stellar.org/prd/core-live/core_live_001/", "http://history.stellar.org/prd/core-live/core_live_002/", "http://history.stellar.org/prd/core-live/core_live_003/"] - -# configures history retention window for transactions and events, expressed in -# number of ledgers, the default value is 120960 which corresponds to about 7 -# days of history -HISTORY_RETENTION_WINDOW = 120960 - -# Ingestion Timeout when bootstrapping data (checkpoint and in-memory -# initialization) and preparing ledger reads -INGESTION_TIMEOUT = "50m0s" - -# format used for output logs (json or text) -# LOG_FORMAT = "text" - -# minimum log severity (debug, info, warn, error) to log -LOG_LEVEL = "info" - -# Maximum amount of events allowed in a single getEvents response -MAX_EVENTS_LIMIT = 10000 - -# The maximum duration of time allowed for processing a getEvents request. When -# that time elapses, the rpc server would return -32001 and abort the request's -# execution -MAX_GET_EVENTS_EXECUTION_DURATION = "10s" - -# The maximum duration of time allowed for processing a getFeeStats request. -# When that time elapses, the rpc server would return -32001 and abort the -# request's execution -MAX_GET_FEE_STATS_EXECUTION_DURATION = "5s" - -# The maximum duration of time allowed for processing a getHealth request. When -# that time elapses, the rpc server would return -32001 and abort the request's -# execution -MAX_GET_HEALTH_EXECUTION_DURATION = "5s" - -# The maximum duration of time allowed for processing a getLatestLedger request. -# When that time elapses, the rpc server would return -32001 and abort the -# request's execution -MAX_GET_LATEST_LEDGER_EXECUTION_DURATION = "5s" - -# The maximum duration of time allowed for processing a getLedgers request. When -# that time elapses, the rpc server would return -32001 and abort the request's -# execution -MAX_GET_LEDGERS_EXECUTION_DURATION = "10s" - -# The maximum duration of time allowed for processing a getLedgerEntries -# request. When that time elapses, the rpc server would return -32001 and abort -# the request's execution -MAX_GET_LEDGER_ENTRIES_EXECUTION_DURATION = "5s" - -# The maximum duration of time allowed for processing a getNetwork request. When -# that time elapses, the rpc server would return -32001 and abort the request's -# execution -MAX_GET_NETWORK_EXECUTION_DURATION = "5s" - -# The maximum duration of time allowed for processing a getTransactions request. -# When that time elapses, the rpc server would return -32001 and abort the -# request's execution -MAX_GET_TRANSACTIONS_EXECUTION_DURATION = "5s" - -# The maximum duration of time allowed for processing a getTransaction request. -# When that time elapses, the rpc server would return -32001 and abort the -# request's execution -MAX_GET_TRANSACTION_EXECUTION_DURATION = "5s" - -# The maximum duration of time allowed for processing a getVersionInfo request. -# When that time elapses, the rpc server would return -32001 and abort the -# request's execution -MAX_GET_VERSION_INFO_EXECUTION_DURATION = "5s" - -# maximum ledger latency (i.e. time elapsed since the last known ledger closing -# time) considered to be healthy (used for the /health endpoint) -MAX_HEALTHY_LEDGER_LATENCY = "30s" - -# Maximum amount of ledgers allowed in a single getLedgers response -MAX_LEDGERS_LIMIT = 200 - -# The max request execution duration is the predefined maximum duration of time -# allowed for processing a request. When that time elapses, the server would -# return 504 and abort the request's execution -MAX_REQUEST_EXECUTION_DURATION = "25s" - -# The maximum duration of time allowed for processing a sendTransaction request. -# When that time elapses, the rpc server would return -32001 and abort the -# request's execution -MAX_SEND_TRANSACTION_EXECUTION_DURATION = "15s" - -# The maximum duration of time allowed for processing a simulateTransaction -# request. When that time elapses, the rpc server would return -32001 and abort -# the request's execution -MAX_SIMULATE_TRANSACTION_EXECUTION_DURATION = "15s" - -# Maximum amount of transactions allowed in a single getTransactions response -MAX_TRANSACTIONS_LIMIT = 200 - -# Network passphrase of the Stellar network transactions should be signed for. -# Commonly used values are "Test SDF Future Network ; October 2022", "Test SDF -# Network ; September 2015" and "Public Global Stellar Network ; September 2015" -NETWORK_PASSPHRASE = "Public Global Stellar Network ; September 2015" - -# NETWORK = "pubnet" - -# Enable debug information in preflighting (provides more detailed errors). It -# should not be enabled in production deployments. -PREFLIGHT_ENABLE_DEBUG = true - -# Number of workers (read goroutines) used to compute preflights for the -# simulateTransaction endpoint. Defaults to the number of CPUs. -PREFLIGHT_WORKER_COUNT = 12 - -# Maximum number of outstanding preflight requests for the simulateTransaction -# endpoint. Defaults to the number of CPUs. -PREFLIGHT_WORKER_QUEUE_SIZE = 12 - -# Maximum number of outstanding GetEvents requests -REQUEST_BACKLOG_GET_EVENTS_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding GetFeeStats requests -REQUEST_BACKLOG_GET_FEE_STATS_QUEUE_LIMIT = 100 - -# Maximum number of outstanding GetHealth requests -REQUEST_BACKLOG_GET_HEALTH_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding GetLatestsLedger requests -REQUEST_BACKLOG_GET_LATEST_LEDGER_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding getLedgers requests -REQUEST_BACKLOG_GET_LEDGERS_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding GetLedgerEntries requests -REQUEST_BACKLOG_GET_LEDGER_ENTRIES_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding GetNetwork requests -REQUEST_BACKLOG_GET_NETWORK_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding GetTransactions requests -REQUEST_BACKLOG_GET_TRANSACTIONS_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding GetTransaction requests -REQUEST_BACKLOG_GET_TRANSACTION_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding GetVersionInfo requests -REQUEST_BACKLOG_GET_VERSION_INFO_QUEUE_LIMIT = 1000 - -# Maximum number of outstanding requests -REQUEST_BACKLOG_GLOBAL_QUEUE_LIMIT = 5000 - -# Maximum number of outstanding SendTransaction requests -REQUEST_BACKLOG_SEND_TRANSACTION_QUEUE_LIMIT = 500 - -# Maximum number of outstanding SimulateTransaction requests -REQUEST_BACKLOG_SIMULATE_TRANSACTION_QUEUE_LIMIT = 100 - -# The request execution warning threshold is the predetermined maximum duration -# of time that a request can take to be processed before a warning would be -# generated -REQUEST_EXECUTION_WARNING_THRESHOLD = "5s" - -# Fetch historical ledgers from the datastore if they're not available locally. -SERVE_LEDGERS_FROM_DATASTORE = true - -# configures soroban inclusion fee stats retention window expressed in number of -# ledgers -SOROBAN_FEE_STATS_RETENTION_WINDOW = 50 - -# HTTP port for Captive Core to listen on (0 disables the HTTP server) -STELLAR_CAPTIVE_CORE_HTTP_PORT = 11626 - -# HTTP port for Captive Core to listen on for high-performance queries like -# /getledgerentry (must not conflict with CAPTIVE_CORE_HTTP_PORT) -STELLAR_CAPTIVE_CORE_HTTP_QUERY_PORT = 11628 - -# Size of ledger history in Captive Core's high-performance query server (don't -# touch unless you know what you are doing) -STELLAR_CAPTIVE_CORE_HTTP_QUERY_SNAPSHOT_LEDGERS = 4 - -# Number of threads to use by Captive Core's high-performance query server -STELLAR_CAPTIVE_CORE_HTTP_QUERY_THREAD_POOL_SIZE = 12 - -# path to stellar core binary -STELLAR_CORE_BINARY_PATH = "/usr/local/bin/stellar-core" - -# Timeout used when submitting requests to stellar-core -STELLAR_CORE_TIMEOUT = "2s" - -# URL used to query Stellar Core (local captive core by default) -# STELLAR_CORE_URL = "" - -# Enable strict toml configuration file parsing. This will prevent unknown -# fields in the config toml from being parsed. -# STRICT = false - -# Buffered storage backend configuration for reading ledgers from the datastore. -# [buffered_storage_backend_config] - # buffer_size = 100 - # num_workers = 10 - # retry_limit = 0 - # retry_wait = "0s" - -# External datastore configuration including type, bucket name and schema. -[datastore_config] - Compression = "zstd" - NetworkPassphrase = "Public Global Stellar Network ; September 2015" - type = "GCS" - - [datastore_config.params] - destination_bucket_path = "sdf-ledger-close-meta/v1/ledgers/pubnet" - - [datastore_config.schema] - FileExtension = "zst" - files_per_partition = 64000 - ledgers_per_file = 1 - From 815095fb8f9512a3987107252dbac906a88ae5ca Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 22 Jan 2026 01:33:12 -0500 Subject: [PATCH 53/72] optimized backfill, improved names/comments, removed dead code --- cmd/stellar-rpc/internal/config/options.go | 4 +- cmd/stellar-rpc/internal/daemon/daemon.go | 10 +- cmd/stellar-rpc/internal/db/db.go | 2 +- cmd/stellar-rpc/internal/db/ledger.go | 5 +- cmd/stellar-rpc/internal/ingest/backfill.go | 212 ++++++++++---------- cmd/stellar-rpc/internal/ingest/service.go | 5 +- 6 files changed, 117 insertions(+), 121 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index 20fdf65a..4cdc0da2 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -84,7 +84,7 @@ func (cfg *Config) options() Options { }, { Name: "backfill", - Usage: "backfill database with `n` ledgers synchronously on startup", + Usage: "Populates database with `history-retention-window` ledgers synchronously on startup. This defaults to a week of ledgers if unspecified", ConfigKey: &cfg.Backfill, DefaultValue: false, Validate: func(_ *Option) error { @@ -97,7 +97,7 @@ func (cfg *Config) options() Options { }, { Name: "backfill-timeout", - Usage: "Timeout for backfill database", + Usage: "Timeout for backfill database. If not set, defaults to 1 hour per day of ledgers", ConfigKey: &cfg.BackfillTimeout, DefaultValue: time.Duration(0), Validate: func(_ *Option) error { diff --git a/cmd/stellar-rpc/internal/daemon/daemon.go b/cmd/stellar-rpc/internal/daemon/daemon.go index 8719ef79..1f0a2982 100644 --- a/cmd/stellar-rpc/internal/daemon/daemon.go +++ b/cmd/stellar-rpc/internal/daemon/daemon.go @@ -85,14 +85,6 @@ func (d *Daemon) GetEndpointAddrs() (net.TCPAddr, *net.TCPAddr) { return *addr, adminAddr } -func (d *Daemon) GetIngestService() *ingest.Service { - return d.ingestService -} - -func (d *Daemon) StopIngestion() error { - return d.ingestService.Close() -} - func (d *Daemon) close() { shutdownCtx, shutdownRelease := context.WithTimeout(context.Background(), defaultShutdownGracePeriod) defer shutdownRelease() @@ -231,7 +223,7 @@ func MustNew(cfg *config.Config, logger *supportlog.Entry) *Daemon { feewindows.Reset() } // Start ingestion service only after backfill is complete - ingest.StartService(daemon.ingestService, ingestCfg) + daemon.ingestService.Start(ingestCfg) daemon.preflightWorkerPool = createPreflightWorkerPool(cfg, logger, daemon) daemon.jsonRPCHandler = createJSONRPCHandler(cfg, logger, daemon, feewindows) diff --git a/cmd/stellar-rpc/internal/db/db.go b/cmd/stellar-rpc/internal/db/db.go index 49cd7f7f..f5fe2a7c 100644 --- a/cmd/stellar-rpc/internal/db/db.go +++ b/cmd/stellar-rpc/internal/db/db.go @@ -60,9 +60,9 @@ type DB struct { func (d *DB) ResetCache() { d.cache.Lock() + defer d.cache.Unlock() d.cache.latestLedgerSeq = 0 d.cache.latestLedgerCloseTime = 0 - d.cache.Unlock() } func openSQLiteDB(dbFilePath string) (*db.Session, error) { diff --git a/cmd/stellar-rpc/internal/db/ledger.go b/cmd/stellar-rpc/internal/db/ledger.go index 10432ec0..d4c5ebf4 100644 --- a/cmd/stellar-rpc/internal/db/ledger.go +++ b/cmd/stellar-rpc/internal/db/ledger.go @@ -285,10 +285,7 @@ func getLedgerSequencesInRange(ctx context.Context, db readDB, start uint32, end sq.LtOrEq{"sequence": end}, }) var sequences []uint32 - if err := db.Select(ctx, &sequences, sql); err != nil { - return nil, err - } - return sequences, nil + return sequences, db.Select(ctx, &sequences, sql) } type ledgerWriter struct { diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 1b12eec8..6c95a6f9 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -7,6 +7,7 @@ import ( "github.com/pkg/errors" + checkpoint "github.com/stellar/go-stellar-sdk/historyarchive" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/go-stellar-sdk/support/datastore" supportlog "github.com/stellar/go-stellar-sdk/support/log" @@ -32,25 +33,23 @@ type BackfillMeta struct { } type datastoreInfo struct { - ds datastore.DataStore - schema datastore.DataStoreSchema - minSeq uint32 - // Note maxSeq is excluded because it goes stale every 6 seconds - // it is replaced by `currentTipLedger` in RunBackfill + ds datastore.DataStore + schema datastore.DataStoreSchema + sequences db.LedgerSeqRange // holds the sequence numbers of the oldest and current tip ledgers in the datastore } // This struct holds the local database read/write constructs and metadata initially associated with it type databaseInfo struct { - reader db.LedgerReader - minSeq uint32 - maxSeq uint32 - isEmpty bool + reader db.LedgerReader + sequences db.LedgerSeqRange // holds the sequence numbers of the oldest and newest ledgers in the local database + isNewDb bool } type backfillBounds struct { - backwards db.LedgerSeqRange - forwards db.LedgerSeqRange - skipBackwards bool + backfill db.LedgerSeqRange + frontfill db.LedgerSeqRange + nBackfill uint32 + checkpointAligner checkpoint.CheckpointManager } // Creates a new BackfillMeta struct @@ -79,18 +78,23 @@ func NewBackfillMeta( } return BackfillMeta{ - logger: service.logger.WithField("component", "backfill"), + logger: logger.WithField("subservice", "backfill"), ingestService: service, dbInfo: databaseInfo{ - reader: reader, - minSeq: ledgerRange.FirstLedger.Sequence, - maxSeq: ledgerRange.LastLedger.Sequence, - isEmpty: dbIsEmpty, + reader: reader, + sequences: db.LedgerSeqRange{ + First: ledgerRange.FirstLedger.Sequence, + Last: ledgerRange.LastLedger.Sequence, + }, + isNewDb: dbIsEmpty, }, dsInfo: datastoreInfo{ ds: ds, schema: dsSchema, - minSeq: minWrittenDSLedger, + sequences: db.LedgerSeqRange{ + First: minWrittenDSLedger, + // last is set any time getLatestSeqInCDP is called + }, }, }, nil } @@ -103,34 +107,35 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { defer cancelBackfill() // Ensure no pre-existing gaps in local DB - if err := backfill.runPrecheck(ctx); err != nil { + if err := backfill.runCheckNoGaps(ctx, cfg.IngestionTimeout); err != nil { return err } - bounds, nBackfill, err := backfill.setBounds(ctx, cfg.HistoryRetentionWindow) + bounds, err := backfill.setBounds(ctx, cfg.HistoryRetentionWindow, cfg.CheckpointFrequency) if err != nil { return errors.Wrap(err, "could not set backfill bounds") } - // If DB isn't empty, backfill backwards from local DB tail to the left edge of retention window - if !bounds.skipBackwards { - if err := backfill.runBackfillBackwards(ctx, bounds); err != nil { + // If DB isn't new, fill backwards from local DB tail to the left edge of retention window + if !backfill.dbInfo.isNewDb { + if err := backfill.runBackfill(ctx, bounds); err != nil { return err } } - // Backfill from local DB head (or left edge of retention window, if empty) to current tip of datastore - if err := backfill.runBackfillForwards(ctx, &bounds, cfg.CheckpointFrequency); err != nil { + // Fill forward from local DB head (or left edge of retention window, if empty) to current tip of datastore + if err := backfill.runFrontfill(ctx, &bounds); err != nil { return err } - return backfill.runPostcheck(ctx, nBackfill) + // Ensure no gaps introduced and retention window requirements met + return backfill.runPostcheck(ctx, cfg.IngestionTimeout, bounds.nBackfill) } // Ensures local DB is gapless prior to backfilling -func (backfill *BackfillMeta) runPrecheck(ctx context.Context) error { +func (backfill *BackfillMeta) runCheckNoGaps(ctx context.Context, timeout time.Duration) error { backfill.logger.Infof("Starting initialization/precheck for backfilling the local database") - if !backfill.dbInfo.isEmpty { - if _, _, err := backfill.verifyDbGapless(ctx); err != nil { + if !backfill.dbInfo.isNewDb { + if _, _, err := backfill.verifyDbGapless(ctx, timeout); err != nil { return errors.Wrap(err, "backfill precheck failed") } } else { @@ -140,94 +145,87 @@ func (backfill *BackfillMeta) runPrecheck(ctx context.Context) error { return nil } -// Sets the bounds for backwards and forwards backfill phases, determines number of ledgers to backfill, and -// whether backwards backfill phase should be skipped +// Sets the bounds for backfill and frontfill phases, determines number of ledgers to backfill, and +// whether backfill phase should be skipped func (backfill *BackfillMeta) setBounds( ctx context.Context, retentionWindow uint32, -) (backfillBounds, uint32, error) { - // Determine bounds for ledgers to be written to local DB in backwards and forwards phases - currentTipLedger, err := getLatestSeqInCDP(ctx, backfill.dsInfo.ds) - if err != nil { - return backfillBounds{}, 0, - errors.Wrap(err, "could not get latest ledger number from cloud datastore") + checkpointFrequency uint32, +) (backfillBounds, error) { + // Determine bounds for ledgers to be written to local DB in backfill and frontfill phases + if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { + return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") + } + currentTipLedger := backfill.dsInfo.sequences.Last + fillBounds := backfillBounds{ + nBackfill: min(retentionWindow, currentTipLedger), + checkpointAligner: checkpoint.NewCheckpointManager(checkpointFrequency), } - fillBounds := backfillBounds{} - nBackfill := min(retentionWindow, currentTipLedger) - fillBounds.skipBackwards = false backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", - currentTipLedger, nBackfill) + currentTipLedger, fillBounds.nBackfill) // if initial DB empty, skip backwards backfill - if backfill.dbInfo.isEmpty { - fillBounds.forwards.First = max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) - fillBounds.skipBackwards = true + if backfill.dbInfo.isNewDb { + fillBounds.frontfill.First = max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) } else { - if currentTipLedger < backfill.dbInfo.minSeq { - // If we attempt to backfill from lBoundBackwards to currentTipLedger in this case, - // we introduce a gap missing ledgers of sequences (currentTipLedger, backfill.dbInfo.minSeq-1) - return backfillBounds{}, 0, - errors.New("datastore stale: current tip is older than local DB minimum ledger") + if currentTipLedger < backfill.dbInfo.sequences.First { + // this would introduce a gap missing ledgers of sequences between the current tip and local DB minimum + return backfillBounds{}, errors.New("current datastore tip is older than local DB minimum ledger") } - fillBounds.backwards.First = max(currentTipLedger-nBackfill+1, backfill.dsInfo.minSeq) - fillBounds.backwards.Last = backfill.dbInfo.minSeq - 1 - fillBounds.forwards.First = backfill.dbInfo.maxSeq + 1 + fillBounds.backfill.First = max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) + fillBounds.backfill.Last = backfill.dbInfo.sequences.First - 1 + fillBounds.frontfill.First = backfill.dbInfo.sequences.Last + 1 } - return fillBounds, nBackfill, nil + return fillBounds, nil } // Backfills the local DB with older ledgers from newest to oldest within the retention window -func (backfill *BackfillMeta) runBackfillBackwards(ctx context.Context, bounds backfillBounds) error { - backfill.logger.Infof("Backfilling backwards to the left edge of retention window, ledgers [%d <- %d]", - bounds.backwards.First, bounds.backwards.Last) - lBound, rBound := bounds.backwards.First, bounds.backwards.Last - if err := backfill.backfillChunksBackwards(ctx, lBound, rBound); err != nil { - return errors.Wrap(err, "backfill backwards failed") +func (backfill *BackfillMeta) runBackfill(ctx context.Context, bounds backfillBounds) error { + backfill.logger.Infof("Backfilling to the left edge of retention window, ledgers [%d <- %d]", + bounds.backfill.First, bounds.backfill.Last) + if err := backfill.backfillChunks(ctx, &bounds); err != nil { + return errors.Wrap(err, "backfill failed") } - backfill.dbInfo.minSeq = bounds.backwards.First - backfill.logger.Infof("Backward backfill of old ledgers complete") + backfill.dbInfo.sequences.First = bounds.backfill.First + backfill.logger.Infof("Backfill of old ledgers complete") return nil } // Backfills the local DB with older ledgers from oldest to newest within the retention window -func (backfill *BackfillMeta) runBackfillForwards( - ctx context.Context, - bounds *backfillBounds, - ledgersInCheckpoint uint32, -) error { +func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds *backfillBounds) error { numIterations := 1 - // If we skipped backwards backfill, do a second forwards push to a refreshed current tip - if bounds.skipBackwards { + // If we skipped backfilling, do a second forwards push to a refreshed current tip + if backfill.dbInfo.isNewDb { numIterations = 2 } - var err error for range numIterations { - if bounds.forwards.Last, err = getLatestSeqInCDP(ctx, backfill.dsInfo.ds); err != nil { + if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - bounds.forwards.Last -= (bounds.forwards.Last % ledgersInCheckpoint) // Align to checkpoint - if bounds.forwards.First < bounds.forwards.Last { - backfill.logger.Infof("Backfilling forwards to the current datastore tip, ledgers [%d -> %d]", - bounds.forwards.First, bounds.forwards.Last) - if err = backfill.backfillChunksForwards(ctx, bounds.forwards.First, bounds.forwards.Last); err != nil { - return errors.Wrap(err, "backfill forwards failed") + bounds.frontfill.Last = backfill.dsInfo.sequences.Last + bounds.frontfill.Last = bounds.checkpointAligner.PrevCheckpoint(bounds.frontfill.Last) + if bounds.frontfill.First < bounds.frontfill.Last { + backfill.logger.Infof("Frontfilling to the current datastore tip, ledgers [%d -> %d]", + bounds.frontfill.First, bounds.frontfill.Last) + if err := backfill.frontfillChunks(ctx, bounds); err != nil { + return errors.Wrap(err, "frontfill failed") } } else { - backfill.logger.Infof("No forwards backfill needed, local DB head already at datastore tip") + backfill.logger.Infof("No frontfill needed, local DB head already at datastore tip") } - if bounds.skipBackwards { - bounds.forwards.First = bounds.forwards.Last + 1 + if backfill.dbInfo.isNewDb { + bounds.frontfill.First = bounds.frontfill.Last + 1 } } - backfill.dbInfo.maxSeq = max(bounds.forwards.Last, backfill.dbInfo.maxSeq) + backfill.dbInfo.sequences.Last = max(bounds.frontfill.Last, backfill.dbInfo.sequences.Last) backfill.logger.Infof("Forward backfill of recent ledgers complete") return nil } // Verifies backfilled ledgers are gapless and meet retention window requirements -func (backfill *BackfillMeta) runPostcheck(ctx context.Context, nBackfill uint32) error { +func (backfill *BackfillMeta) runPostcheck(ctx context.Context, timeout time.Duration, nBackfill uint32) error { backfill.logger.Infof("Starting post-backfill verification") - minSeq, maxSeq, err := backfill.verifyDbGapless(ctx) + minSeq, maxSeq, err := backfill.verifyDbGapless(ctx, timeout) count := maxSeq - minSeq + 1 if err != nil { return errors.Wrap(err, "post-backfill verification failed") @@ -241,9 +239,9 @@ func (backfill *BackfillMeta) runPostcheck(ctx context.Context, nBackfill uint32 } // Checks to ensure state of local DB is acceptable for backfilling -func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint32, error) { - ctx, cancelPrecheck := context.WithTimeout(ctx, 4*time.Minute) - defer cancelPrecheck() +func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time.Duration) (uint32, uint32, error) { + ctx, cancelCheckNoGaps := context.WithTimeout(ctx, timeout) + defer cancelCheckNoGaps() ledgerRange, err := backfill.dbInfo.reader.GetLedgerRange(ctx) if err != nil { @@ -268,13 +266,14 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context) (uint32, uint } // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore -// Used to fill local DB backwards towards older ledgers -func (backfill *BackfillMeta) backfillChunksBackwards(ctx context.Context, lBound uint32, rBound uint32) error { - for rChunkBound := rBound; rChunkBound >= lBound; { +// Used to fill local DB backwards towards older ledgers (starting from newest) +func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds *backfillBounds) error { + lBound, rBound := bounds.backfill.First, bounds.backfill.Last + for i, rChunkBound := 0, rBound; rChunkBound >= lBound; i++ { if err := ctx.Err(); err != nil { return err } - // Create temporary backend for backwards-filling chunks + // Create temporary backend for backward-filling chunks // Note monotonicity constraint of the ledger backend tempBackend, err := makeBackend(backfill.dsInfo) if err != nil { @@ -288,7 +287,7 @@ func (backfill *BackfillMeta) backfillChunksBackwards(ctx context.Context, lBoun } else { lChunkBound = lBound } - backfill.logger.Infof("Backwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) + backfill.logger.Infof("Backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { return err @@ -296,7 +295,7 @@ func (backfill *BackfillMeta) backfillChunksBackwards(ctx context.Context, lBoun if err := backfill.ingestService.ingestRange(ctx, tempBackend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - backfill.logger.Infof("Backwards backfill: committed ledgers [%d, %d]; %d%% done", + backfill.logger.Infof("Backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) if err := tempBackend.Close(); err != nil { @@ -306,14 +305,23 @@ func (backfill *BackfillMeta) backfillChunksBackwards(ctx context.Context, lBoun break } rChunkBound = lChunkBound - 1 + // Refresh lBound periodically to account for ledgers coming into the datastore + if i > 0 && i%10 == 0 { + if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { + return err + } + lBound = max(backfill.dsInfo.sequences.Last-bounds.nBackfill+1, backfill.dsInfo.sequences.First) + } } + bounds.backfill.First = lBound return nil } // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore -// Used to fill local DB backwards towards the current ledger tip -func (backfill *BackfillMeta) backfillChunksForwards(ctx context.Context, lBound uint32, rBound uint32) error { - // Backend for forwards backfill can be persistent over multiple chunks +// Used to fill local DB forwards towards the current ledger tip +func (backfill *BackfillMeta) frontfillChunks(ctx context.Context, bounds *backfillBounds) error { + lBound, rBound := bounds.frontfill.First, bounds.frontfill.Last + // Backend for frontfill can be persistent over multiple chunks backend, err := makeBackend(backfill.dsInfo) if err != nil { return errors.Wrap(err, "could not create ledger backend") @@ -335,11 +343,11 @@ func (backfill *BackfillMeta) backfillChunksForwards(ctx context.Context, lBound rChunkBound := min(rBound, lChunkBound+ChunkSize-1) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) - backfill.logger.Infof("Forwards backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) + backfill.logger.Infof("Frontfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) if err := backfill.ingestService.ingestRange(ctx, backend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - backfill.logger.Infof("Forwards backfill: committed ledgers [%d, %d]; %d%% done", + backfill.logger.Infof("Frontfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/max(rBound-lBound, 1)) } return nil @@ -363,15 +371,15 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { return backend, err } -// Gets the latest ledger number stored in the cloud Datastore/datalake -// Stores it in tip pointer -func getLatestSeqInCDP(callerCtx context.Context, ds datastore.DataStore) (uint32, error) { +// Gets the latest ledger number stored in the cloud Datastore/datalake and updates datastoreInfo.sequences.Last +func (dsInfo *datastoreInfo) getLatestSeqInCDP(callerCtx context.Context) error { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) defer cancelRunBackfill() - seq, err := datastore.FindLatestLedgerSequence(ctx, ds) + var err error + dsInfo.sequences.Last, err = datastore.FindLatestLedgerSequence(ctx, dsInfo.ds) if err != nil { - return 0, errors.Wrap(err, "could not get latest ledger sequence from datastore") + return errors.Wrap(err, "could not get latest ledger sequence from datastore") } - return seq, nil + return nil } diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index 16d0973e..b230b596 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -90,7 +90,7 @@ func newService(cfg Config) *Service { return service } -func StartService(service *Service, cfg Config) { +func (service *Service) Start(cfg Config) { ctx, done := context.WithCancel(context.Background()) service.done = done service.wg.Add(1) @@ -241,9 +241,8 @@ func (s *Service) ingest(ctx context.Context, sequence uint32) error { } // Ingests a range of ledgers from a provided ledgerBackend -// Does NOT call ingestLedgerCloseMeta for each ledger as these metrics aren't suitable for backfilling func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBackend, seqRange backends.Range) error { - s.logger.Infof("Ingesting ledgers [%d, %d]", seqRange.From(), seqRange.To()) + s.logger.Debugf("Ingesting ledgers [%d, %d]", seqRange.From(), seqRange.To()) var ledgerCloseMeta xdr.LedgerCloseMeta startTime := time.Now() From 9914db0c77bba0f4ff0ce5f9885c48da4a9660d5 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 22 Jan 2026 01:36:12 -0500 Subject: [PATCH 54/72] return backend directly --- cmd/stellar-rpc/internal/ingest/backfill.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 6c95a6f9..cf7ac23f 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -358,7 +358,7 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { ledgersPerFile := dsInfo.schema.LedgersPerFile bufferSize := max(1024/ledgersPerFile, 10) // use fewer files if many ledgers per file numWorkers := max(bufferSize/10, 5) // approx. 1 worker per 10 buffered files - backend, err := ledgerbackend.NewBufferedStorageBackend( + return ledgerbackend.NewBufferedStorageBackend( ledgerbackend.BufferedStorageBackendConfig{ BufferSize: bufferSize, // number of files to buffer NumWorkers: numWorkers, // number of concurrent GCS fetchers; each shares one buffer of above size @@ -368,7 +368,6 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { dsInfo.ds, dsInfo.schema, ) - return backend, err } // Gets the latest ledger number stored in the cloud Datastore/datalake and updates datastoreInfo.sequences.Last From 0499d2dad4a66f1e0ef69092d43aac7994a43a1c Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 22 Jan 2026 14:02:45 -0500 Subject: [PATCH 55/72] reduced memory footprint of gapless check, minor test patches --- cmd/stellar-rpc/internal/db/ledger.go | 28 ++++++++++++++----- cmd/stellar-rpc/internal/db/mocks.go | 4 +-- cmd/stellar-rpc/internal/ingest/backfill.go | 8 ++---- .../internal/ingest/backfill_test.go | 5 ++-- .../internal/ingest/service_test.go | 2 +- .../internal/integrationtest/backfill_test.go | 22 ++++++--------- .../methods/get_latest_ledger_test.go | 7 +++-- cmd/stellar-rpc/internal/methods/mocks.go | 4 +-- 8 files changed, 46 insertions(+), 34 deletions(-) diff --git a/cmd/stellar-rpc/internal/db/ledger.go b/cmd/stellar-rpc/internal/db/ledger.go index d4c5ebf4..b8fb5ee5 100644 --- a/cmd/stellar-rpc/internal/db/ledger.go +++ b/cmd/stellar-rpc/internal/db/ledger.go @@ -25,7 +25,7 @@ type LedgerReader interface { GetLedger(ctx context.Context, sequence uint32) (xdr.LedgerCloseMeta, bool, error) StreamAllLedgers(ctx context.Context, f StreamLedgerFn) error GetLedgerRange(ctx context.Context) (ledgerbucketwindow.LedgerRange, error) - GetLedgerSequencesInRange(ctx context.Context, start uint32, end uint32) ([]uint32, error) + GetLedgerCountInRange(ctx context.Context, start uint32, end uint32) (uint32, uint32, uint32, error) StreamLedgerRange(ctx context.Context, startLedger uint32, endLedger uint32, f StreamLedgerFn) error NewTx(ctx context.Context) (LedgerReaderTx, error) GetLatestLedgerSequence(ctx context.Context) (uint32, error) @@ -209,8 +209,8 @@ func (r ledgerReader) GetLedgerRange(ctx context.Context) (ledgerbucketwindow.Le return getLedgerRangeWithoutCache(ctx, r.db) } -func (r ledgerReader) GetLedgerSequencesInRange(ctx context.Context, start uint32, end uint32) ([]uint32, error) { - return getLedgerSequencesInRange(ctx, r.db, start, end) +func (r ledgerReader) GetLedgerCountInRange(ctx context.Context, start, end uint32) (uint32, uint32, uint32, error) { + return getLedgerCountInRange(ctx, r.db, start, end) } func (r ledgerReader) GetLatestLedgerSequence(ctx context.Context) (uint32, error) { @@ -278,14 +278,28 @@ func getLedgerRangeWithoutCache(ctx context.Context, db readDB) (ledgerbucketwin }, nil } -func getLedgerSequencesInRange(ctx context.Context, db readDB, start uint32, end uint32) ([]uint32, error) { - sql := sq.Select("sequence").From(ledgerCloseMetaTableName). +// Queries a local DB, and in the inclusive range [start, end], returns the count of ledgers, and min/max sequence nums +// Assumes all sequence numbers in the DB are unique +func getLedgerCountInRange(ctx context.Context, db readDB, start, end uint32) (uint32, uint32, uint32, error) { + sql := sq.Select("COUNT(*) as count", "MIN(sequence) as min_seq", "MAX(sequence) as max_seq"). + From(ledgerCloseMetaTableName). Where(sq.And{ sq.GtOrEq{"sequence": start}, sq.LtOrEq{"sequence": end}, }) - var sequences []uint32 - return sequences, db.Select(ctx, &sequences, sql) + + var results []struct { + Count int64 `db:"count"` + MinSeq int64 `db:"min_seq"` + MaxSeq int64 `db:"max_seq"` + } + if err := db.Select(ctx, &results, sql); err != nil { + return 0, 0, 0, err + } + if len(results) == 0 { + return 0, 0, 0, nil + } + return uint32(results[0].Count), uint32(results[0].MinSeq), uint32(results[0].MaxSeq), nil } type ledgerWriter struct { diff --git a/cmd/stellar-rpc/internal/db/mocks.go b/cmd/stellar-rpc/internal/db/mocks.go index 927280f6..65a21093 100644 --- a/cmd/stellar-rpc/internal/db/mocks.go +++ b/cmd/stellar-rpc/internal/db/mocks.go @@ -117,8 +117,8 @@ func (m *MockLedgerReader) NewTx(_ context.Context) (LedgerReaderTx, error) { return nil, errors.New("mock NewTx error") } -func (m *MockLedgerReader) GetLedgerSequencesInRange(_ context.Context, _, _ uint32) ([]uint32, error) { - return nil, nil +func (m *MockLedgerReader) GetLedgerCountInRange(_ context.Context, _, _ uint32) (uint32, uint32, uint32, error) { + return 0, 0, 0, nil } var ( diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index cf7ac23f..22ab600f 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -252,15 +252,13 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time. backfill.logger.Debugf("DB verify: checking for gaps in [%d, %d]", minDbSeq, maxDbSeq) expectedCount := maxDbSeq - minDbSeq + 1 - sequences, err := backfill.dbInfo.reader.GetLedgerSequencesInRange(ctx, minDbSeq, maxDbSeq) + count, sequencesMin, sequencesMax, err := backfill.dbInfo.reader.GetLedgerCountInRange(ctx, minDbSeq, maxDbSeq) if err != nil { return 0, 0, errors.Wrap(err, "db verify: could not get ledger sequences in local DB") } - sequencesMin, sequencesMax := sequences[0], sequences[len(sequences)-1] - - if len(sequences) != int(expectedCount) { + if count != expectedCount { return 0, 0, fmt.Errorf("db verify: gap detected in local DB: expected %d ledgers, got %d ledgers", - expectedCount, len(sequences)) + expectedCount, count) } return sequencesMin, sequencesMax, nil } diff --git a/cmd/stellar-rpc/internal/ingest/backfill_test.go b/cmd/stellar-rpc/internal/ingest/backfill_test.go index 0631fa2d..c238b40a 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill_test.go +++ b/cmd/stellar-rpc/internal/ingest/backfill_test.go @@ -3,6 +3,7 @@ package ingest import ( "path" "testing" + "time" "github.com/stretchr/testify/require" @@ -46,7 +47,7 @@ func TestGapDetection(t *testing.T) { logger: testLogger, dbInfo: databaseInfo{reader: db.NewLedgerReader(testDB)}, } - _, _, err = backfill.verifyDbGapless(ctx) + _, _, err = backfill.verifyDbGapless(ctx, 5*time.Second) require.Error(t, err) require.ErrorContains(t, err, "gap detected in local DB") @@ -56,7 +57,7 @@ func TestGapDetection(t *testing.T) { require.NoError(t, writeTx.LedgerWriter().InsertLedger(createLedger(103))) require.NoError(t, writeTx.Commit(ledgers[len(ledgers)-1], nil)) - _, _, err = backfill.verifyDbGapless(ctx) + _, _, err = backfill.verifyDbGapless(ctx, 5*time.Second) require.NoError(t, err) } diff --git a/cmd/stellar-rpc/internal/ingest/service_test.go b/cmd/stellar-rpc/internal/ingest/service_test.go index 68ca329e..ed90eca3 100644 --- a/cmd/stellar-rpc/internal/ingest/service_test.go +++ b/cmd/stellar-rpc/internal/ingest/service_test.go @@ -54,7 +54,7 @@ func TestRetryRunningIngestion(t *testing.T) { Daemon: interfaces.MakeNoOpDeamon(), } service := NewService(config) - StartService(service, config) + service.Start(config) retryWg.Wait() service.Close() assert.Equal(t, 1, numRetries) diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index a1cae931..c08238e1 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -87,23 +87,19 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint // Verify ledgers present in DB // We cannot use GetLedgers as it will fall back to the datastore, which is cheating reader := db.NewLedgerReader(testDb) - ledgers, err := reader.GetLedgerSequencesInRange(t.Context(), datastoreStart, uint32(stopLedger)) + count, minSeq, maxSeq, err := reader.GetLedgerCountInRange(t.Context(), datastoreStart, uint32(stopLedger)) require.NoError(t, err) - count := uint32(len(ledgers)) require.Equal(t, retentionWindow, count, "expected to have ingested %d ledgers, got %d", retentionWindow, count) // Ensure at least one ledger from datastore and at least one from core ingestion - require.LessOrEqual(t, ledgers[0], datastoreEnd, "did not ingest ledgers from datastore: "+ - fmt.Sprintf("expected first ledger <= %d, got %d", datastoreEnd, ledgers[count-1])) - require.Greater(t, ledgers[count-1], datastoreEnd, "did not ingest ledgers from core after backfill: "+ - fmt.Sprintf("expected last ledger > %d, got %d", datastoreEnd, ledgers[count-1])) + require.LessOrEqual(t, minSeq, datastoreEnd, "did not ingest ledgers from datastore: "+ + fmt.Sprintf("expected first ledger <= %d, got %d", datastoreEnd, minSeq)) + require.Greater(t, maxSeq, datastoreEnd, "did not ingest ledgers from core after backfill: "+ + fmt.Sprintf("expected last ledger > %d, got %d", datastoreEnd, maxSeq)) // Verify they're contiguous - prevSequence := ledgers[0] - for i, sequence := range ledgers[1:] { - require.Equal(t, prevSequence+1, sequence, - "gap detected at position %d: expected %d, got %d", i, prevSequence+1, sequence) - prevSequence = sequence - } - t.Logf("Verified ledgers %d-%d present in local DB", ledgers[0], ledgers[count-1]) + require.Equal(t, maxSeq-minSeq+1, count, + "gap detected: expected %d ledgers in [%d, %d], got %d", maxSeq-minSeq+1, minSeq, maxSeq, count) + + t.Logf("Verified ledgers %d-%d present in local DB", minSeq, maxSeq) } func waitUntilLedgerIngested(t *testing.T, test *infrastructure.Test, rpcClient *client.Client, diff --git a/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go b/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go index a7f6d6c2..0e89dca6 100644 --- a/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go +++ b/cmd/stellar-rpc/internal/methods/get_latest_ledger_test.go @@ -33,8 +33,11 @@ func (ledgerReader *ConstantLedgerReader) GetLedgerRange(_ context.Context) (led return ledgerbucketwindow.LedgerRange{}, nil } -func (ledgerReader *ConstantLedgerReader) GetLedgerSequencesInRange(_ context.Context, _, _ uint32) ([]uint32, error) { - return nil, nil +func (ledgerReader *ConstantLedgerReader) GetLedgerCountInRange( + _ context.Context, + _, _ uint32, +) (uint32, uint32, uint32, error) { + return 0, 0, 0, nil } func (ledgerReader *ConstantLedgerReader) NewTx(_ context.Context) (db.LedgerReaderTx, error) { diff --git a/cmd/stellar-rpc/internal/methods/mocks.go b/cmd/stellar-rpc/internal/methods/mocks.go index ae17cb72..25f83d44 100644 --- a/cmd/stellar-rpc/internal/methods/mocks.go +++ b/cmd/stellar-rpc/internal/methods/mocks.go @@ -38,9 +38,9 @@ func (m *MockLedgerReader) GetLedgerRange(ctx context.Context) (ledgerbucketwind return args.Get(0).(ledgerbucketwindow.LedgerRange), args.Error(1) //nolint:forcetypeassert } -func (m *MockLedgerReader) GetLedgerSequencesInRange(ctx context.Context, start, end uint32) ([]uint32, error) { +func (m *MockLedgerReader) GetLedgerCountInRange(ctx context.Context, start, end uint32) (uint32, uint32, uint32, error) { args := m.Called(ctx, start, end) - return args.Get(0).([]uint32), args.Error(1) //nolint:forcetypeassert + return args.Get(0).(uint32), args.Get(1).(uint32), args.Get(2).(uint32), args.Error(3) //nolint:forcetypeassert } func (m *MockLedgerReader) StreamLedgerRange(ctx context.Context, startLedger, endLedger uint32, From 729867e2575162dddee0f772ee7c2c53dee9a396 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 22 Jan 2026 14:22:31 -0500 Subject: [PATCH 56/72] linter: fixed casting danger, lll --- cmd/stellar-rpc/internal/db/ledger.go | 9 ++++++++- cmd/stellar-rpc/internal/db/mocks.go | 5 ++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cmd/stellar-rpc/internal/db/ledger.go b/cmd/stellar-rpc/internal/db/ledger.go index b8fb5ee5..0097352d 100644 --- a/cmd/stellar-rpc/internal/db/ledger.go +++ b/cmd/stellar-rpc/internal/db/ledger.go @@ -6,6 +6,7 @@ import ( "database/sql" "errors" "fmt" + "math" sq "github.com/Masterminds/squirrel" @@ -296,9 +297,15 @@ func getLedgerCountInRange(ctx context.Context, db readDB, start, end uint32) (u if err := db.Select(ctx, &results, sql); err != nil { return 0, 0, 0, err } - if len(results) == 0 { + if len(results) == 0 || results[0].Count == 0 { return 0, 0, 0, nil } + // ensure casting to uint32 is safe + if results[0].Count < 0 || results[0].Count > math.MaxUint32 || + results[0].MinSeq < 0 || results[0].MinSeq > math.MaxUint32 || + results[0].MaxSeq < 0 || results[0].MaxSeq > math.MaxUint32 { + return 0, 0, 0, errors.New("ledger count query returned out-of-range values") + } return uint32(results[0].Count), uint32(results[0].MinSeq), uint32(results[0].MaxSeq), nil } diff --git a/cmd/stellar-rpc/internal/db/mocks.go b/cmd/stellar-rpc/internal/db/mocks.go index 65a21093..513083de 100644 --- a/cmd/stellar-rpc/internal/db/mocks.go +++ b/cmd/stellar-rpc/internal/db/mocks.go @@ -117,7 +117,10 @@ func (m *MockLedgerReader) NewTx(_ context.Context) (LedgerReaderTx, error) { return nil, errors.New("mock NewTx error") } -func (m *MockLedgerReader) GetLedgerCountInRange(_ context.Context, _, _ uint32) (uint32, uint32, uint32, error) { +func (m *MockLedgerReader) GetLedgerCountInRange(_ context.Context, + _ uint32, + _ uint32, +) (uint32, uint32, uint32, error) { return 0, 0, 0, nil } From f0169ed34f91867285bce153ea995e5257c8bd90 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Thu, 22 Jan 2026 15:25:57 -0500 Subject: [PATCH 57/72] getLedgerCountInRange minor improvement, service naming in Start() linter fix --- cmd/stellar-rpc/internal/db/ledger.go | 16 +++++----------- cmd/stellar-rpc/internal/ingest/service.go | 12 ++++++------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/cmd/stellar-rpc/internal/db/ledger.go b/cmd/stellar-rpc/internal/db/ledger.go index 0097352d..be909920 100644 --- a/cmd/stellar-rpc/internal/db/ledger.go +++ b/cmd/stellar-rpc/internal/db/ledger.go @@ -6,7 +6,6 @@ import ( "database/sql" "errors" "fmt" - "math" sq "github.com/Masterminds/squirrel" @@ -290,9 +289,9 @@ func getLedgerCountInRange(ctx context.Context, db readDB, start, end uint32) (u }) var results []struct { - Count int64 `db:"count"` - MinSeq int64 `db:"min_seq"` - MaxSeq int64 `db:"max_seq"` + Count uint32 `db:"count"` + MinSeq uint32 `db:"min_seq"` + MaxSeq uint32 `db:"max_seq"` } if err := db.Select(ctx, &results, sql); err != nil { return 0, 0, 0, err @@ -300,13 +299,8 @@ func getLedgerCountInRange(ctx context.Context, db readDB, start, end uint32) (u if len(results) == 0 || results[0].Count == 0 { return 0, 0, 0, nil } - // ensure casting to uint32 is safe - if results[0].Count < 0 || results[0].Count > math.MaxUint32 || - results[0].MinSeq < 0 || results[0].MinSeq > math.MaxUint32 || - results[0].MaxSeq < 0 || results[0].MaxSeq > math.MaxUint32 { - return 0, 0, 0, errors.New("ledger count query returned out-of-range values") - } - return uint32(results[0].Count), uint32(results[0].MinSeq), uint32(results[0].MaxSeq), nil + + return results[0].Count, results[0].MinSeq, results[0].MaxSeq, nil } type ledgerWriter struct { diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index b230b596..4072c2de 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -90,20 +90,20 @@ func newService(cfg Config) *Service { return service } -func (service *Service) Start(cfg Config) { +func (s *Service) Start(cfg Config) { ctx, done := context.WithCancel(context.Background()) - service.done = done - service.wg.Add(1) + s.done = done + s.wg.Add(1) panicGroup := util.UnrecoverablePanicGroup.Log(cfg.Logger) panicGroup.Go(func() { - defer service.wg.Done() + defer s.wg.Done() // Retry running ingestion every second for 5 seconds. constantBackoff := backoff.WithMaxRetries(backoff.NewConstantBackOff(1*time.Second), maxRetries) // Don't want to keep retrying if the context gets canceled. contextBackoff := backoff.WithContext(constantBackoff, ctx) err := backoff.RetryNotify( func() error { - err := service.run(ctx, cfg.Archive) + err := s.run(ctx, cfg.Archive) if errors.Is(err, errEmptyArchives) { // keep retrying until history archives are published constantBackoff.Reset() @@ -113,7 +113,7 @@ func (service *Service) Start(cfg Config) { contextBackoff, cfg.OnIngestionRetry) if err != nil && !errors.Is(err, context.Canceled) { - service.logger.WithError(err).Fatal("could not run ingestion") + s.logger.WithError(err).Fatal("could not run ingestion") } }) } From e85a0913c6cbac7b7ae32fdf9277b5e19e6f66db Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 23 Jan 2026 11:29:38 -0500 Subject: [PATCH 58/72] linter lll --- cmd/stellar-rpc/internal/methods/mocks.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/stellar-rpc/internal/methods/mocks.go b/cmd/stellar-rpc/internal/methods/mocks.go index 25f83d44..fc1b4fdf 100644 --- a/cmd/stellar-rpc/internal/methods/mocks.go +++ b/cmd/stellar-rpc/internal/methods/mocks.go @@ -38,7 +38,11 @@ func (m *MockLedgerReader) GetLedgerRange(ctx context.Context) (ledgerbucketwind return args.Get(0).(ledgerbucketwindow.LedgerRange), args.Error(1) //nolint:forcetypeassert } -func (m *MockLedgerReader) GetLedgerCountInRange(ctx context.Context, start, end uint32) (uint32, uint32, uint32, error) { +func (m *MockLedgerReader) GetLedgerCountInRange( + ctx context.Context, + start uint32, + end uint32, +) (uint32, uint32, uint32, error) { args := m.Called(ctx, start, end) return args.Get(0).(uint32), args.Get(1).(uint32), args.Get(2).(uint32), args.Error(3) //nolint:forcetypeassert } From 43e38eeaa5aa3a4f741cdfa45cc97631a6397b51 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 23 Jan 2026 16:26:21 -0500 Subject: [PATCH 59/72] optimized InsertEvents --- cmd/stellar-rpc/internal/db/event.go | 51 +++++++++++++--------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/cmd/stellar-rpc/internal/db/event.go b/cmd/stellar-rpc/internal/db/event.go index feb287d6..9a61c665 100644 --- a/cmd/stellar-rpc/internal/db/event.go +++ b/cmd/stellar-rpc/internal/db/event.go @@ -104,6 +104,8 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { // where -1 is actually the largest possible uint32. // var beforeIndex, afterIndex uint32 + // Accumulate all ledger events to insert + var allLedgerEvents []dbEvent for { var tx ingest.LedgerTransaction @@ -125,7 +127,6 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { opEvents := allEvents.OperationEvents txEvents := allEvents.TransactionEvents - insertableEvents := make([]dbEvent, 0, len(txEvents)+len(opEvents)) var afterTxIndex uint32 @@ -173,13 +174,13 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { return err } - insertableEvents = append(insertableEvents, insertedEvent) + allLedgerEvents = append(allLedgerEvents, insertedEvent) } // Then, gather all of the operation events. for opIndex, innerOpEvents := range opEvents { for eventIndex, event := range innerOpEvents { - insertableEvents = append(insertableEvents, dbEvent{ + allLedgerEvents = append(allLedgerEvents, dbEvent{ TxHash: tx.Hash, Event: xdr.DiagnosticEvent{ InSuccessfulContractCall: tx.Successful(), @@ -194,35 +195,31 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { }) } } + } - query := sq.Insert(eventTableName). - Columns( - "id", - "contract_id", - "event_type", - "event_data", - "ledger_close_time", - "transaction_hash", - "topic1", "topic2", "topic3", "topic4", - ) + if len(allLedgerEvents) == 0 { + return nil + } - for _, event := range insertableEvents { - query, err = insertEvents(query, lcm, event) - if err != nil { - return err - } - } + query := sq.Insert(eventTableName). + Columns( + "id", + "contract_id", + "event_type", + "event_data", + "ledger_close_time", + "transaction_hash", + "topic1", "topic2", "topic3", "topic4", + ) - if len(insertableEvents) > 0 { // don't run empty insert - // Ignore the last inserted ID as it is not needed - _, err = query.RunWith(eventHandler.stmtCache).Exec() - if err != nil { - return err - } + for _, event := range allLedgerEvents { + query, err = insertEvents(query, lcm, event) + if err != nil { + return err } } - - return nil + _, err = query.RunWith(eventHandler.stmtCache).Exec() + return err } func insertEvents( From b106492400ccfd7720feb97163cae5576193d26c Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Fri, 23 Jan 2026 17:55:32 -0500 Subject: [PATCH 60/72] bounds setting improvements, minor cosmetic changes --- cmd/stellar-rpc/internal/config/options.go | 2 +- cmd/stellar-rpc/internal/ingest/backfill.go | 56 +++++++++++-------- .../integrationtest/infrastructure/test.go | 3 +- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index 4cdc0da2..f90390b0 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -97,7 +97,7 @@ func (cfg *Config) options() Options { }, { Name: "backfill-timeout", - Usage: "Timeout for backfill database. If not set, defaults to 1 hour per day of ledgers", + Usage: "Timeout for backfilling database. If not set, defaults to 1 hour per day of ledgers in your history retention window", ConfigKey: &cfg.BackfillTimeout, DefaultValue: time.Duration(0), Validate: func(_ *Option) error { diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 22ab600f..1620c54d 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -115,11 +115,9 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { return errors.Wrap(err, "could not set backfill bounds") } - // If DB isn't new, fill backwards from local DB tail to the left edge of retention window - if !backfill.dbInfo.isNewDb { - if err := backfill.runBackfill(ctx, bounds); err != nil { - return err - } + // Fill backwards from local DB tail to the left edge of retention window if necessary + if err := backfill.runBackfill(ctx, bounds); err != nil { + return err } // Fill forward from local DB head (or left edge of retention window, if empty) to current tip of datastore @@ -163,39 +161,48 @@ func (backfill *BackfillMeta) setBounds( } backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", currentTipLedger, fillBounds.nBackfill) - // if initial DB empty, skip backwards backfill - if backfill.dbInfo.isNewDb { - fillBounds.frontfill.First = max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) + + fillStart := max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) + minDbSeq := backfill.dbInfo.sequences.First + // if initial DB empty or tail covers edge of filling window, skip backwards backfill + if backfill.dbInfo.isNewDb || fillStart >= minDbSeq { + fillBounds.frontfill.First = fillStart + fillBounds.backfill.First = 1 // indicates backfill phase is skipped } else { if currentTipLedger < backfill.dbInfo.sequences.First { // this would introduce a gap missing ledgers of sequences between the current tip and local DB minimum return backfillBounds{}, errors.New("current datastore tip is older than local DB minimum ledger") } - fillBounds.backfill.First = max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) - fillBounds.backfill.Last = backfill.dbInfo.sequences.First - 1 + fillBounds.backfill.First = fillStart + fillBounds.backfill.Last = minDbSeq - 1 fillBounds.frontfill.First = backfill.dbInfo.sequences.Last + 1 + // set frontfill last to current datastore tip later during frontfill phase } return fillBounds, nil } // Backfills the local DB with older ledgers from newest to oldest within the retention window func (backfill *BackfillMeta) runBackfill(ctx context.Context, bounds backfillBounds) error { - backfill.logger.Infof("Backfilling to the left edge of retention window, ledgers [%d <- %d]", - bounds.backfill.First, bounds.backfill.Last) - if err := backfill.backfillChunks(ctx, &bounds); err != nil { - return errors.Wrap(err, "backfill failed") + if bounds.backfill.First <= bounds.backfill.Last { + backfill.logger.Infof("Backfilling to the left edge of retention window, ledgers [%d <- %d]", + bounds.backfill.First, bounds.backfill.Last) + if err := backfill.backfillChunks(ctx, &bounds); err != nil { + return errors.Wrap(err, "backfill failed") + } + backfill.dbInfo.sequences.First = bounds.backfill.First + backfill.logger.Infof("Backfill of old ledgers complete") + } else { + backfill.logger.Infof("No backfill needed, local DB tail already at retention window edge") } - backfill.dbInfo.sequences.First = bounds.backfill.First - backfill.logger.Infof("Backfill of old ledgers complete") - return nil } // Backfills the local DB with older ledgers from oldest to newest within the retention window func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds *backfillBounds) error { numIterations := 1 - // If we skipped backfilling, do a second forwards push to a refreshed current tip - if backfill.dbInfo.isNewDb { + // If we skipped backfilling, we want to fill forwards twice because the latest ledger may be + // significantly further in the future after the first fill completes and fills are faster than catch-up. + if bounds.backfill.First > bounds.backfill.Last { numIterations = 2 } for range numIterations { @@ -231,8 +238,9 @@ func (backfill *BackfillMeta) runPostcheck(ctx context.Context, timeout time.Dur return errors.Wrap(err, "post-backfill verification failed") } if count+ledgerThreshold < nBackfill { - return fmt.Errorf("post-backfill verification failed: expected at least %d ledgers, "+ + backfill.logger.Warnf("post-backfill verification warning: expected at least %d ledgers, "+ "got %d ledgers (exceeds acceptable threshold of %d missing ledgers)", nBackfill, count, ledgerThreshold) + backfill.logger.Warn("You may wish to run backfill again to avoid a long post-backfill catch-up period") } backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) return nil @@ -277,6 +285,11 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds *backfi if err != nil { return errors.Wrap(err, "couldn't create backend") } + defer func() { + if err := tempBackend.Close(); err != nil { + backfill.logger.Warnf("error closing temporary backend: %v", err) + } + }() var lChunkBound uint32 // Underflow check for chunk bounds @@ -296,9 +309,6 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds *backfi backfill.logger.Infof("Backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) - if err := tempBackend.Close(); err != nil { - backfill.logger.Warnf("error closing temporary backend: %v", err) - } if lChunkBound == lBound { break } diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index 3b4d095a..81908394 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -45,6 +45,7 @@ const ( FriendbotURL = "http://localhost:8000/friendbot" // Needed when Core is run with ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING=true checkpointFrequency = 8 + ledgerCloseTime = time.Second // seconds per ledger with accelerated time captiveCoreConfigFilename = "captive-core-integration-tests.cfg" captiveCoreConfigTemplateFilename = captiveCoreConfigFilename + ".tmpl" @@ -336,7 +337,7 @@ func (i *Test) waitForCoreAtLedger(ledger int) { info, err := i.getCoreInfo() return err == nil && info.Info.Ledger.Num >= ledger }, - 90*time.Second, + time.Duration(ledger+5)*time.Second*ledgerCloseTime, time.Second, ) } From 3ce449d5c38ed40f771b41df5127471f088e3bed Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Mon, 26 Jan 2026 12:44:41 -0500 Subject: [PATCH 61/72] improved operator documentation/help and linter errors, removed pass-by-references --- CHANGELOG.md | 2 +- cmd/stellar-rpc/internal/config/options.go | 2 +- cmd/stellar-rpc/internal/db/event.go | 4 +- cmd/stellar-rpc/internal/ingest/backfill.go | 37 ++++++++++--------- .../integrationtest/infrastructure/test.go | 2 +- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ac9132f..a6e519d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ go get -u github.com/stellar/go-stellar-sdk/protocols/rpc ``` ### Added -- Added `--backfill` configuration parameter providing synchronous backfilling of `HISTORY_RETENTION_WINDOW` ledgers to the local DB prior to RPC starting ([#571](https://github.com/stellar/stellar-rpc/pull/571)). +- Added `--backfill` configuration parameter providing synchronous backfilling of `HISTORY_RETENTION_WINDOW` ledgers to the local DB prior to RPC starting. For one week of ledgers (approximately 150Gb), this can be expected to complete in under three hours and use <3 Gb of memory (less than core itself). To use this, one must enable a datastore and `SERVE_LEDGERS_FROM_DATASTORE`, which also enables `getLedger` ([#571](https://github.com/stellar/stellar-rpc/pull/571)). - Expanded `getLatestLedger` endpoint to also return `closeTime`, `headerXdr`, and `metadataXdr` ([#554](https://github.com/stellar/stellar-rpc/pull/554)). - Added `soroban-env-host` info to `version` command ([#550](https://github.com/stellar/stellar-rpc/pull/550)). - Added `--network` configuration parameter, allowing users to specify a default Stellar network (`testnet`, `pubnet`, or `futurenet`) ([#540](https://github.com/stellar/stellar-rpc/pull/540), [#543](https://github.com/stellar/stellar-rpc/pull/543)). diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index f90390b0..8bf44597 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -90,7 +90,7 @@ func (cfg *Config) options() Options { Validate: func(_ *Option) error { // Ensure config is valid for backfill if cfg.Backfill && !cfg.ServeLedgersFromDatastore { - return errors.New("backfill requires serving ledgers from datastore to be enabled") + return errors.New("backfill requires serving ledgers from datastore to be enabled. See the `--serve-ledgers-from-datastore` flag") } return nil }, diff --git a/cmd/stellar-rpc/internal/db/event.go b/cmd/stellar-rpc/internal/db/event.go index 9a61c665..d413252f 100644 --- a/cmd/stellar-rpc/internal/db/event.go +++ b/cmd/stellar-rpc/internal/db/event.go @@ -104,8 +104,8 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { // where -1 is actually the largest possible uint32. // var beforeIndex, afterIndex uint32 - // Accumulate all ledger events to insert - var allLedgerEvents []dbEvent + // Pre-size buffer: empirically about 16 Gb of events per Gb of transaction (0.8:13.7 tx:event ratio) + allLedgerEvents := make([]dbEvent, 0, txCount*16) for { var tx ingest.LedgerTransaction diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 1620c54d..3128edd5 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -116,12 +116,12 @@ func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { } // Fill backwards from local DB tail to the left edge of retention window if necessary - if err := backfill.runBackfill(ctx, bounds); err != nil { + if bounds, err = backfill.runBackfill(ctx, bounds); err != nil { return err } // Fill forward from local DB head (or left edge of retention window, if empty) to current tip of datastore - if err := backfill.runFrontfill(ctx, &bounds); err != nil { + if bounds, err = backfill.runFrontfill(ctx, bounds); err != nil { return err } @@ -182,23 +182,24 @@ func (backfill *BackfillMeta) setBounds( } // Backfills the local DB with older ledgers from newest to oldest within the retention window -func (backfill *BackfillMeta) runBackfill(ctx context.Context, bounds backfillBounds) error { +func (backfill *BackfillMeta) runBackfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { + var err error if bounds.backfill.First <= bounds.backfill.Last { backfill.logger.Infof("Backfilling to the left edge of retention window, ledgers [%d <- %d]", bounds.backfill.First, bounds.backfill.Last) - if err := backfill.backfillChunks(ctx, &bounds); err != nil { - return errors.Wrap(err, "backfill failed") + if bounds, err = backfill.backfillChunks(ctx, bounds); err != nil { + return backfillBounds{}, errors.Wrap(err, "backfill failed") } backfill.dbInfo.sequences.First = bounds.backfill.First backfill.logger.Infof("Backfill of old ledgers complete") } else { backfill.logger.Infof("No backfill needed, local DB tail already at retention window edge") } - return nil + return bounds, nil } // Backfills the local DB with older ledgers from oldest to newest within the retention window -func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds *backfillBounds) error { +func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { numIterations := 1 // If we skipped backfilling, we want to fill forwards twice because the latest ledger may be // significantly further in the future after the first fill completes and fills are faster than catch-up. @@ -207,7 +208,7 @@ func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds *backfill } for range numIterations { if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { - return errors.Wrap(err, "could not get latest ledger number from cloud datastore") + return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } bounds.frontfill.Last = backfill.dsInfo.sequences.Last bounds.frontfill.Last = bounds.checkpointAligner.PrevCheckpoint(bounds.frontfill.Last) @@ -215,7 +216,7 @@ func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds *backfill backfill.logger.Infof("Frontfilling to the current datastore tip, ledgers [%d -> %d]", bounds.frontfill.First, bounds.frontfill.Last) if err := backfill.frontfillChunks(ctx, bounds); err != nil { - return errors.Wrap(err, "frontfill failed") + return backfillBounds{}, errors.Wrap(err, "frontfill failed") } } else { backfill.logger.Infof("No frontfill needed, local DB head already at datastore tip") @@ -226,7 +227,7 @@ func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds *backfill } backfill.dbInfo.sequences.Last = max(bounds.frontfill.Last, backfill.dbInfo.sequences.Last) backfill.logger.Infof("Forward backfill of recent ledgers complete") - return nil + return bounds, nil } // Verifies backfilled ledgers are gapless and meet retention window requirements @@ -273,17 +274,17 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time. // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers (starting from newest) -func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds *backfillBounds) error { +func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { lBound, rBound := bounds.backfill.First, bounds.backfill.Last for i, rChunkBound := 0, rBound; rChunkBound >= lBound; i++ { if err := ctx.Err(); err != nil { - return err + return backfillBounds{}, err } // Create temporary backend for backward-filling chunks // Note monotonicity constraint of the ledger backend tempBackend, err := makeBackend(backfill.dsInfo) if err != nil { - return errors.Wrap(err, "couldn't create backend") + return backfillBounds{}, errors.Wrap(err, "couldn't create backend") } defer func() { if err := tempBackend.Close(); err != nil { @@ -301,10 +302,10 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds *backfi backfill.logger.Infof("Backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { - return err + return backfillBounds{}, err } if err := backfill.ingestService.ingestRange(ctx, tempBackend, chunkRange); err != nil { - return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) + return backfillBounds{}, errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } backfill.logger.Infof("Backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) @@ -316,18 +317,18 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds *backfi // Refresh lBound periodically to account for ledgers coming into the datastore if i > 0 && i%10 == 0 { if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { - return err + return backfillBounds{}, err } lBound = max(backfill.dsInfo.sequences.Last-bounds.nBackfill+1, backfill.dsInfo.sequences.First) } } bounds.backfill.First = lBound - return nil + return bounds, nil } // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB forwards towards the current ledger tip -func (backfill *BackfillMeta) frontfillChunks(ctx context.Context, bounds *backfillBounds) error { +func (backfill *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfillBounds) error { lBound, rBound := bounds.frontfill.First, bounds.frontfill.Last // Backend for frontfill can be persistent over multiple chunks backend, err := makeBackend(backfill.dsInfo) diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index 81908394..e7ae927d 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -337,7 +337,7 @@ func (i *Test) waitForCoreAtLedger(ledger int) { info, err := i.getCoreInfo() return err == nil && info.Info.Ledger.Num >= ledger }, - time.Duration(ledger+5)*time.Second*ledgerCloseTime, + time.Duration(ledger+5)*ledgerCloseTime, time.Second, ) } From 4f59e3a6a4e232a7cd47a3f0c1d28c68c2264b9f Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Mon, 26 Jan 2026 19:16:41 -0500 Subject: [PATCH 62/72] fixed bounds-setting edge case --- cmd/stellar-rpc/internal/ingest/backfill.go | 37 +++++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 3128edd5..5c90509b 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -159,25 +159,41 @@ func (backfill *BackfillMeta) setBounds( nBackfill: min(retentionWindow, currentTipLedger), checkpointAligner: checkpoint.NewCheckpointManager(checkpointFrequency), } - backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", - currentTipLedger, fillBounds.nBackfill) - fillStart := max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) - minDbSeq := backfill.dbInfo.sequences.First + // Determine starting ledger to fill from + var fillStartMin uint32 // minimum possible ledger to start from + if currentTipLedger >= fillBounds.nBackfill+1 { + fillStartMin = max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) + } else { + fillStartMin = backfill.dsInfo.sequences.First + } + // fillStart := max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) + minDbSeq, maxDbSeq := backfill.dbInfo.sequences.First, backfill.dbInfo.sequences.Last + var fillCount uint32 // if initial DB empty or tail covers edge of filling window, skip backwards backfill - if backfill.dbInfo.isNewDb || fillStart >= minDbSeq { - fillBounds.frontfill.First = fillStart + if backfill.dbInfo.isNewDb || minDbSeq <= fillStartMin { + if backfill.dbInfo.isNewDb { + fillBounds.frontfill.First = fillStartMin + fillCount = currentTipLedger - fillStartMin + 1 + } else { + // DB tail already covers left edge of retention window + fillBounds.frontfill.First = maxDbSeq + 1 + fillCount = currentTipLedger - maxDbSeq + } fillBounds.backfill.First = 1 // indicates backfill phase is skipped } else { if currentTipLedger < backfill.dbInfo.sequences.First { // this would introduce a gap missing ledgers of sequences between the current tip and local DB minimum return backfillBounds{}, errors.New("current datastore tip is older than local DB minimum ledger") } - fillBounds.backfill.First = fillStart + fillBounds.backfill.First = fillStartMin fillBounds.backfill.Last = minDbSeq - 1 - fillBounds.frontfill.First = backfill.dbInfo.sequences.Last + 1 + fillBounds.frontfill.First = maxDbSeq + 1 + fillCount = fillBounds.nBackfill - (maxDbSeq - minDbSeq + 1) // set frontfill last to current datastore tip later during frontfill phase } + backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", + currentTipLedger, fillCount) return fillBounds, nil } @@ -221,9 +237,8 @@ func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillB } else { backfill.logger.Infof("No frontfill needed, local DB head already at datastore tip") } - if backfill.dbInfo.isNewDb { - bounds.frontfill.First = bounds.frontfill.Last + 1 - } + // Update frontfill.First for next iteration (if any) + bounds.frontfill.First = bounds.frontfill.Last + 1 } backfill.dbInfo.sequences.Last = max(bounds.frontfill.Last, backfill.dbInfo.sequences.Last) backfill.logger.Infof("Forward backfill of recent ledgers complete") From 5d9570dd2cc791b123890a1651dfffb480e7736b Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 27 Jan 2026 11:41:01 -0500 Subject: [PATCH 63/72] undid events changes --- .dockerignore | 1 + cmd/stellar-rpc/internal/db/event.go | 51 +++++++++++---------- cmd/stellar-rpc/internal/ingest/backfill.go | 4 +- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/.dockerignore b/.dockerignore index be3f035d..eaa59cdc 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,4 @@ target/ +storage/ .soroban/ .cargo/ diff --git a/cmd/stellar-rpc/internal/db/event.go b/cmd/stellar-rpc/internal/db/event.go index d413252f..feb287d6 100644 --- a/cmd/stellar-rpc/internal/db/event.go +++ b/cmd/stellar-rpc/internal/db/event.go @@ -104,8 +104,6 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { // where -1 is actually the largest possible uint32. // var beforeIndex, afterIndex uint32 - // Pre-size buffer: empirically about 16 Gb of events per Gb of transaction (0.8:13.7 tx:event ratio) - allLedgerEvents := make([]dbEvent, 0, txCount*16) for { var tx ingest.LedgerTransaction @@ -127,6 +125,7 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { opEvents := allEvents.OperationEvents txEvents := allEvents.TransactionEvents + insertableEvents := make([]dbEvent, 0, len(txEvents)+len(opEvents)) var afterTxIndex uint32 @@ -174,13 +173,13 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { return err } - allLedgerEvents = append(allLedgerEvents, insertedEvent) + insertableEvents = append(insertableEvents, insertedEvent) } // Then, gather all of the operation events. for opIndex, innerOpEvents := range opEvents { for eventIndex, event := range innerOpEvents { - allLedgerEvents = append(allLedgerEvents, dbEvent{ + insertableEvents = append(insertableEvents, dbEvent{ TxHash: tx.Hash, Event: xdr.DiagnosticEvent{ InSuccessfulContractCall: tx.Successful(), @@ -195,31 +194,35 @@ func (eventHandler *eventHandler) InsertEvents(lcm xdr.LedgerCloseMeta) error { }) } } - } - if len(allLedgerEvents) == 0 { - return nil - } + query := sq.Insert(eventTableName). + Columns( + "id", + "contract_id", + "event_type", + "event_data", + "ledger_close_time", + "transaction_hash", + "topic1", "topic2", "topic3", "topic4", + ) - query := sq.Insert(eventTableName). - Columns( - "id", - "contract_id", - "event_type", - "event_data", - "ledger_close_time", - "transaction_hash", - "topic1", "topic2", "topic3", "topic4", - ) + for _, event := range insertableEvents { + query, err = insertEvents(query, lcm, event) + if err != nil { + return err + } + } - for _, event := range allLedgerEvents { - query, err = insertEvents(query, lcm, event) - if err != nil { - return err + if len(insertableEvents) > 0 { // don't run empty insert + // Ignore the last inserted ID as it is not needed + _, err = query.RunWith(eventHandler.stmtCache).Exec() + if err != nil { + return err + } } } - _, err = query.RunWith(eventHandler.stmtCache).Exec() - return err + + return nil } func insertEvents( diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 5c90509b..d7e4aa2b 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -314,7 +314,7 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds backfil } else { lChunkBound = lBound } - backfill.logger.Infof("Backfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) + backfill.logger.Infof("Backfill: filling ledgers [%d, %d]", lChunkBound, rChunkBound) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { return backfillBounds{}, err @@ -367,7 +367,7 @@ func (backfill *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfi rChunkBound := min(rBound, lChunkBound+ChunkSize-1) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) - backfill.logger.Infof("Frontfill: backfilling ledgers [%d, %d]", lChunkBound, rChunkBound) + backfill.logger.Infof("Frontfill: filling ledgers [%d, %d]", lChunkBound, rChunkBound) if err := backfill.ingestService.ingestRange(ctx, backend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } From 438e757a6b61d53bea00864c101b502c4d03de74 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 27 Jan 2026 14:08:13 -0500 Subject: [PATCH 64/72] changed method receiver names --- cmd/stellar-rpc/internal/ingest/backfill.go | 127 ++++++++++---------- 1 file changed, 63 insertions(+), 64 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index d7e4aa2b..f21f8fb4 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -102,59 +102,59 @@ func NewBackfillMeta( // This function backfills the local database with ledgers from the datastore // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling -func (backfill *BackfillMeta) RunBackfill(cfg *config.Config) error { +func (b *BackfillMeta) RunBackfill(cfg *config.Config) error { ctx, cancelBackfill := context.WithTimeout(context.Background(), cfg.BackfillTimeout) defer cancelBackfill() // Ensure no pre-existing gaps in local DB - if err := backfill.runCheckNoGaps(ctx, cfg.IngestionTimeout); err != nil { + if err := b.runCheckNoGaps(ctx, cfg.IngestionTimeout); err != nil { return err } - bounds, err := backfill.setBounds(ctx, cfg.HistoryRetentionWindow, cfg.CheckpointFrequency) + bounds, err := b.setBounds(ctx, cfg.HistoryRetentionWindow, cfg.CheckpointFrequency) if err != nil { return errors.Wrap(err, "could not set backfill bounds") } // Fill backwards from local DB tail to the left edge of retention window if necessary - if bounds, err = backfill.runBackfill(ctx, bounds); err != nil { + if bounds, err = b.runBackfill(ctx, bounds); err != nil { return err } // Fill forward from local DB head (or left edge of retention window, if empty) to current tip of datastore - if bounds, err = backfill.runFrontfill(ctx, bounds); err != nil { + if bounds, err = b.runFrontfill(ctx, bounds); err != nil { return err } // Ensure no gaps introduced and retention window requirements met - return backfill.runPostcheck(ctx, cfg.IngestionTimeout, bounds.nBackfill) + return b.runPostcheck(ctx, cfg.IngestionTimeout, bounds.nBackfill) } // Ensures local DB is gapless prior to backfilling -func (backfill *BackfillMeta) runCheckNoGaps(ctx context.Context, timeout time.Duration) error { - backfill.logger.Infof("Starting initialization/precheck for backfilling the local database") - if !backfill.dbInfo.isNewDb { - if _, _, err := backfill.verifyDbGapless(ctx, timeout); err != nil { +func (b *BackfillMeta) runCheckNoGaps(ctx context.Context, timeout time.Duration) error { + b.logger.Infof("Starting initialization/precheck for backfilling the local database") + if !b.dbInfo.isNewDb { + if _, _, err := b.verifyDbGapless(ctx, timeout); err != nil { return errors.Wrap(err, "backfill precheck failed") } } else { - backfill.logger.Infof("Local DB is empty, skipping precheck") + b.logger.Infof("Local DB is empty, skipping precheck") } - backfill.logger.Infof("Precheck and initialization passed, no gaps detected in local DB") + b.logger.Infof("Precheck and initialization passed, no gaps detected in local DB") return nil } // Sets the bounds for backfill and frontfill phases, determines number of ledgers to backfill, and // whether backfill phase should be skipped -func (backfill *BackfillMeta) setBounds( +func (b *BackfillMeta) setBounds( ctx context.Context, retentionWindow uint32, checkpointFrequency uint32, ) (backfillBounds, error) { // Determine bounds for ledgers to be written to local DB in backfill and frontfill phases - if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { + if err := b.dsInfo.getLatestSeqInCDP(ctx); err != nil { return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - currentTipLedger := backfill.dsInfo.sequences.Last + currentTipLedger := b.dsInfo.sequences.Last fillBounds := backfillBounds{ nBackfill: min(retentionWindow, currentTipLedger), checkpointAligner: checkpoint.NewCheckpointManager(checkpointFrequency), @@ -163,16 +163,15 @@ func (backfill *BackfillMeta) setBounds( // Determine starting ledger to fill from var fillStartMin uint32 // minimum possible ledger to start from if currentTipLedger >= fillBounds.nBackfill+1 { - fillStartMin = max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) + fillStartMin = max(currentTipLedger-fillBounds.nBackfill+1, b.dsInfo.sequences.First) } else { - fillStartMin = backfill.dsInfo.sequences.First + fillStartMin = b.dsInfo.sequences.First } - // fillStart := max(currentTipLedger-fillBounds.nBackfill+1, backfill.dsInfo.sequences.First) - minDbSeq, maxDbSeq := backfill.dbInfo.sequences.First, backfill.dbInfo.sequences.Last + minDbSeq, maxDbSeq := b.dbInfo.sequences.First, b.dbInfo.sequences.Last var fillCount uint32 // if initial DB empty or tail covers edge of filling window, skip backwards backfill - if backfill.dbInfo.isNewDb || minDbSeq <= fillStartMin { - if backfill.dbInfo.isNewDb { + if b.dbInfo.isNewDb || minDbSeq <= fillStartMin { + if b.dbInfo.isNewDb { fillBounds.frontfill.First = fillStartMin fillCount = currentTipLedger - fillStartMin + 1 } else { @@ -182,7 +181,7 @@ func (backfill *BackfillMeta) setBounds( } fillBounds.backfill.First = 1 // indicates backfill phase is skipped } else { - if currentTipLedger < backfill.dbInfo.sequences.First { + if currentTipLedger < b.dbInfo.sequences.First { // this would introduce a gap missing ledgers of sequences between the current tip and local DB minimum return backfillBounds{}, errors.New("current datastore tip is older than local DB minimum ledger") } @@ -192,30 +191,30 @@ func (backfill *BackfillMeta) setBounds( fillCount = fillBounds.nBackfill - (maxDbSeq - minDbSeq + 1) // set frontfill last to current datastore tip later during frontfill phase } - backfill.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", + b.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", currentTipLedger, fillCount) return fillBounds, nil } // Backfills the local DB with older ledgers from newest to oldest within the retention window -func (backfill *BackfillMeta) runBackfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { +func (b *BackfillMeta) runBackfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { var err error if bounds.backfill.First <= bounds.backfill.Last { - backfill.logger.Infof("Backfilling to the left edge of retention window, ledgers [%d <- %d]", + b.logger.Infof("Backfilling to the left edge of retention window, ledgers [%d <- %d]", bounds.backfill.First, bounds.backfill.Last) - if bounds, err = backfill.backfillChunks(ctx, bounds); err != nil { + if bounds, err = b.backfillChunks(ctx, bounds); err != nil { return backfillBounds{}, errors.Wrap(err, "backfill failed") } - backfill.dbInfo.sequences.First = bounds.backfill.First - backfill.logger.Infof("Backfill of old ledgers complete") + b.dbInfo.sequences.First = bounds.backfill.First + b.logger.Infof("Backfill of old ledgers complete") } else { - backfill.logger.Infof("No backfill needed, local DB tail already at retention window edge") + b.logger.Infof("No backfill needed, local DB tail already at retention window edge") } return bounds, nil } // Backfills the local DB with older ledgers from oldest to newest within the retention window -func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { +func (b *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { numIterations := 1 // If we skipped backfilling, we want to fill forwards twice because the latest ledger may be // significantly further in the future after the first fill completes and fills are faster than catch-up. @@ -223,60 +222,60 @@ func (backfill *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillB numIterations = 2 } for range numIterations { - if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { + if err := b.dsInfo.getLatestSeqInCDP(ctx); err != nil { return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - bounds.frontfill.Last = backfill.dsInfo.sequences.Last + bounds.frontfill.Last = b.dsInfo.sequences.Last bounds.frontfill.Last = bounds.checkpointAligner.PrevCheckpoint(bounds.frontfill.Last) if bounds.frontfill.First < bounds.frontfill.Last { - backfill.logger.Infof("Frontfilling to the current datastore tip, ledgers [%d -> %d]", + b.logger.Infof("Frontfilling to the current datastore tip, ledgers [%d -> %d]", bounds.frontfill.First, bounds.frontfill.Last) - if err := backfill.frontfillChunks(ctx, bounds); err != nil { + if err := b.frontfillChunks(ctx, bounds); err != nil { return backfillBounds{}, errors.Wrap(err, "frontfill failed") } } else { - backfill.logger.Infof("No frontfill needed, local DB head already at datastore tip") + b.logger.Infof("No frontfill needed, local DB head already at datastore tip") } // Update frontfill.First for next iteration (if any) bounds.frontfill.First = bounds.frontfill.Last + 1 } - backfill.dbInfo.sequences.Last = max(bounds.frontfill.Last, backfill.dbInfo.sequences.Last) - backfill.logger.Infof("Forward backfill of recent ledgers complete") + b.dbInfo.sequences.Last = max(bounds.frontfill.Last, b.dbInfo.sequences.Last) + b.logger.Infof("Forward backfill of recent ledgers complete") return bounds, nil } // Verifies backfilled ledgers are gapless and meet retention window requirements -func (backfill *BackfillMeta) runPostcheck(ctx context.Context, timeout time.Duration, nBackfill uint32) error { - backfill.logger.Infof("Starting post-backfill verification") - minSeq, maxSeq, err := backfill.verifyDbGapless(ctx, timeout) +func (b *BackfillMeta) runPostcheck(ctx context.Context, timeout time.Duration, nBackfill uint32) error { + b.logger.Infof("Starting post-backfill verification") + minSeq, maxSeq, err := b.verifyDbGapless(ctx, timeout) count := maxSeq - minSeq + 1 if err != nil { return errors.Wrap(err, "post-backfill verification failed") } if count+ledgerThreshold < nBackfill { - backfill.logger.Warnf("post-backfill verification warning: expected at least %d ledgers, "+ + b.logger.Warnf("post-backfill verification warning: expected at least %d ledgers, "+ "got %d ledgers (exceeds acceptable threshold of %d missing ledgers)", nBackfill, count, ledgerThreshold) - backfill.logger.Warn("You may wish to run backfill again to avoid a long post-backfill catch-up period") + b.logger.Warn("You may wish to run backfill again to avoid a long post-backfill catch-up period") } - backfill.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) + b.logger.Infof("Backfill process complete, ledgers [%d -> %d] are now in local DB", minSeq, maxSeq) return nil } // Checks to ensure state of local DB is acceptable for backfilling -func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time.Duration) (uint32, uint32, error) { +func (b *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time.Duration) (uint32, uint32, error) { ctx, cancelCheckNoGaps := context.WithTimeout(ctx, timeout) defer cancelCheckNoGaps() - ledgerRange, err := backfill.dbInfo.reader.GetLedgerRange(ctx) + ledgerRange, err := b.dbInfo.reader.GetLedgerRange(ctx) if err != nil { return 0, 0, errors.Wrap(err, "db verify: could not get ledger range") } // Get sequence number of highest/lowest ledgers in local DB minDbSeq, maxDbSeq := ledgerRange.FirstLedger.Sequence, ledgerRange.LastLedger.Sequence - backfill.logger.Debugf("DB verify: checking for gaps in [%d, %d]", + b.logger.Debugf("DB verify: checking for gaps in [%d, %d]", minDbSeq, maxDbSeq) expectedCount := maxDbSeq - minDbSeq + 1 - count, sequencesMin, sequencesMax, err := backfill.dbInfo.reader.GetLedgerCountInRange(ctx, minDbSeq, maxDbSeq) + count, sequencesMin, sequencesMax, err := b.dbInfo.reader.GetLedgerCountInRange(ctx, minDbSeq, maxDbSeq) if err != nil { return 0, 0, errors.Wrap(err, "db verify: could not get ledger sequences in local DB") } @@ -289,7 +288,7 @@ func (backfill *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time. // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers (starting from newest) -func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { +func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { lBound, rBound := bounds.backfill.First, bounds.backfill.Last for i, rChunkBound := 0, rBound; rChunkBound >= lBound; i++ { if err := ctx.Err(); err != nil { @@ -297,13 +296,13 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds backfil } // Create temporary backend for backward-filling chunks // Note monotonicity constraint of the ledger backend - tempBackend, err := makeBackend(backfill.dsInfo) + tempBackend, err := makeBackend(b.dsInfo) if err != nil { return backfillBounds{}, errors.Wrap(err, "couldn't create backend") } defer func() { if err := tempBackend.Close(); err != nil { - backfill.logger.Warnf("error closing temporary backend: %v", err) + b.logger.Warnf("error closing temporary backend: %v", err) } }() @@ -314,15 +313,15 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds backfil } else { lChunkBound = lBound } - backfill.logger.Infof("Backfill: filling ledgers [%d, %d]", lChunkBound, rChunkBound) + b.logger.Infof("Backfill: filling ledgers [%d, %d]", lChunkBound, rChunkBound) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { return backfillBounds{}, err } - if err := backfill.ingestService.ingestRange(ctx, tempBackend, chunkRange); err != nil { + if err := b.ingestService.ingestRange(ctx, tempBackend, chunkRange); err != nil { return backfillBounds{}, errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - backfill.logger.Infof("Backfill: committed ledgers [%d, %d]; %d%% done", + b.logger.Infof("Backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) if lChunkBound == lBound { @@ -331,10 +330,10 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds backfil rChunkBound = lChunkBound - 1 // Refresh lBound periodically to account for ledgers coming into the datastore if i > 0 && i%10 == 0 { - if err := backfill.dsInfo.getLatestSeqInCDP(ctx); err != nil { + if err := b.dsInfo.getLatestSeqInCDP(ctx); err != nil { return backfillBounds{}, err } - lBound = max(backfill.dsInfo.sequences.Last-bounds.nBackfill+1, backfill.dsInfo.sequences.First) + lBound = max(b.dsInfo.sequences.Last-bounds.nBackfill+1, b.dsInfo.sequences.First) } } bounds.backfill.First = lBound @@ -343,16 +342,16 @@ func (backfill *BackfillMeta) backfillChunks(ctx context.Context, bounds backfil // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB forwards towards the current ledger tip -func (backfill *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfillBounds) error { +func (b *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfillBounds) error { lBound, rBound := bounds.frontfill.First, bounds.frontfill.Last // Backend for frontfill can be persistent over multiple chunks - backend, err := makeBackend(backfill.dsInfo) + backend, err := makeBackend(b.dsInfo) if err != nil { return errors.Wrap(err, "could not create ledger backend") } defer func() { if err := backend.Close(); err != nil { - backfill.logger.Warnf("error closing ledger backend: %v", err) + b.logger.Warnf("error closing ledger backend: %v", err) } }() @@ -367,17 +366,17 @@ func (backfill *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfi rChunkBound := min(rBound, lChunkBound+ChunkSize-1) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) - backfill.logger.Infof("Frontfill: filling ledgers [%d, %d]", lChunkBound, rChunkBound) - if err := backfill.ingestService.ingestRange(ctx, backend, chunkRange); err != nil { + b.logger.Infof("Frontfill: filling ledgers [%d, %d]", lChunkBound, rChunkBound) + if err := b.ingestService.ingestRange(ctx, backend, chunkRange); err != nil { return errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } - backfill.logger.Infof("Frontfill: committed ledgers [%d, %d]; %d%% done", + b.logger.Infof("Frontfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rChunkBound-lBound)/max(rBound-lBound, 1)) } return nil } -// Creates a buffered storage backend for the given datastore +// Returns a buffered storage backend for the given datastore func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { ledgersPerFile := dsInfo.schema.LedgersPerFile bufferSize := max(1024/ledgersPerFile, 10) // use fewer files if many ledgers per file @@ -395,12 +394,12 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { } // Gets the latest ledger number stored in the cloud Datastore/datalake and updates datastoreInfo.sequences.Last -func (dsInfo *datastoreInfo) getLatestSeqInCDP(callerCtx context.Context) error { +func (d *datastoreInfo) getLatestSeqInCDP(callerCtx context.Context) error { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) defer cancelRunBackfill() var err error - dsInfo.sequences.Last, err = datastore.FindLatestLedgerSequence(ctx, dsInfo.ds) + d.sequences.Last, err = datastore.FindLatestLedgerSequence(ctx, d.ds) if err != nil { return errors.Wrap(err, "could not get latest ledger sequence from datastore") } From 7805f023ae55bd0a233cdb4ebef765148a7ccc3e Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 27 Jan 2026 14:40:26 -0500 Subject: [PATCH 65/72] removed timeout configurability --- cmd/stellar-rpc/internal/config/main.go | 1 - cmd/stellar-rpc/internal/config/options.go | 13 ------------- cmd/stellar-rpc/internal/ingest/backfill.go | 3 ++- .../internal/integrationtest/backfill_test.go | 3 +-- .../internal/integrationtest/infrastructure/test.go | 7 ------- 5 files changed, 3 insertions(+), 24 deletions(-) diff --git a/cmd/stellar-rpc/internal/config/main.go b/cmd/stellar-rpc/internal/config/main.go index 776c3e72..c74d92ad 100644 --- a/cmd/stellar-rpc/internal/config/main.go +++ b/cmd/stellar-rpc/internal/config/main.go @@ -29,7 +29,6 @@ type Config struct { Endpoint string AdminEndpoint string - BackfillTimeout time.Duration CheckpointFrequency uint32 CoreRequestTimeout time.Duration DefaultEventsLimit uint diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go index 8bf44597..909e2939 100644 --- a/cmd/stellar-rpc/internal/config/options.go +++ b/cmd/stellar-rpc/internal/config/options.go @@ -95,19 +95,6 @@ func (cfg *Config) options() Options { return nil }, }, - { - Name: "backfill-timeout", - Usage: "Timeout for backfilling database. If not set, defaults to 1 hour per day of ledgers in your history retention window", - ConfigKey: &cfg.BackfillTimeout, - DefaultValue: time.Duration(0), - Validate: func(_ *Option) error { - if cfg.BackfillTimeout == time.Duration(0) { - hours := max(cfg.HistoryRetentionWindow/OneDayOfLedgers, 1) - cfg.BackfillTimeout = time.Duration(hours) * time.Hour - } - return nil - }, - }, { Name: "stellar-core-timeout", Usage: "Timeout used when submitting requests to stellar-core", diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index f21f8fb4..326b24bb 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -103,7 +103,8 @@ func NewBackfillMeta( // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling func (b *BackfillMeta) RunBackfill(cfg *config.Config) error { - ctx, cancelBackfill := context.WithTimeout(context.Background(), cfg.BackfillTimeout) + timeout := max(cfg.HistoryRetentionWindow/config.OneDayOfLedgers, 1) + ctx, cancelBackfill := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Hour) defer cancelBackfill() // Ensure no pre-existing gaps in local DB diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index c08238e1..84c123d0 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -64,8 +64,7 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint DatastoreConfigFunc: makeDatastoreConfig, NoParallel: true, // can't use parallel due to env vars DelayDaemonForLedgerN: int(datastoreEnd), // stops daemon start until core has at least the datastore ledgers - BackfillTimeout: 4 * time.Minute, - IgnoreLedgerCloseTimes: true, // artificially seeded ledgers don't need correct close times relative to core's + IgnoreLedgerCloseTimes: true, // artificially seeded ledgers don't need correct close times relative to core's }) testDb := test.GetDaemon().GetDB() diff --git a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go index e7ae927d..3a08998c 100644 --- a/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go +++ b/cmd/stellar-rpc/internal/integrationtest/infrastructure/test.go @@ -89,7 +89,6 @@ type TestConfig struct { // empty string to skip upgrading altogether. ApplyLimits *string - BackfillTimeout time.Duration IgnoreLedgerCloseTimes bool // disregard close times when ingesting ledgers DelayDaemonForLedgerN int // don't start daemon until ledger N reached by core @@ -142,8 +141,6 @@ type Test struct { ignoreLedgerCloseTimes bool datastoreConfigFunc func(*config.Config) - - backfillTimeout time.Duration } func NewTest(t testing.TB, cfg *TestConfig) *Test { @@ -166,7 +163,6 @@ func NewTest(t testing.TB, cfg *TestConfig) *Test { i.captiveCoreStoragePath = cfg.CaptiveCoreStoragePath parallel = !cfg.NoParallel i.datastoreConfigFunc = cfg.DatastoreConfigFunc - i.backfillTimeout = cfg.BackfillTimeout i.ignoreLedgerCloseTimes = cfg.IgnoreLedgerCloseTimes if cfg.OnlyRPC != nil { @@ -377,7 +373,6 @@ func (i *Test) getRPConfigForDaemon() rpcConfig { archiveURL: "http://" + i.testPorts.CoreArchiveHostPort, sqlitePath: i.sqlitePath, captiveCoreHTTPQueryPort: i.testPorts.captiveCoreHTTPQueryPort, - backfillTimeout: i.backfillTimeout, ignoreLedgerCloseTimes: i.ignoreLedgerCloseTimes, } } @@ -393,7 +388,6 @@ type rpcConfig struct { captiveCoreHTTPPort uint16 archiveURL string sqlitePath string - backfillTimeout time.Duration ignoreLedgerCloseTimes bool } @@ -407,7 +401,6 @@ func (vars rpcConfig) toMap() map[string]string { "ENDPOINT": vars.endPoint, "ADMIN_ENDPOINT": vars.adminEndpoint, "STELLAR_CORE_URL": vars.stellarCoreURL, - "BACKFILL_TIMEOUT": vars.backfillTimeout.String(), "CORE_REQUEST_TIMEOUT": "2s", "STELLAR_CORE_BINARY_PATH": vars.coreBinaryPath, "CAPTIVE_CORE_CONFIG_PATH": vars.captiveCoreConfigPath, From c8488c49ccb369bde359ab898fbe87f97a5cb9bf Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 27 Jan 2026 16:14:24 -0500 Subject: [PATCH 66/72] changed function name + linter error --- cmd/stellar-rpc/internal/ingest/backfill.go | 10 +++++----- .../internal/integrationtest/backfill_test.go | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 326b24bb..592747b8 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -152,7 +152,7 @@ func (b *BackfillMeta) setBounds( checkpointFrequency uint32, ) (backfillBounds, error) { // Determine bounds for ledgers to be written to local DB in backfill and frontfill phases - if err := b.dsInfo.getLatestSeqInCDP(ctx); err != nil { + if err := b.dsInfo.refreshLatestSeqInDatastore(ctx); err != nil { return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } currentTipLedger := b.dsInfo.sequences.Last @@ -223,7 +223,7 @@ func (b *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillBounds) numIterations = 2 } for range numIterations { - if err := b.dsInfo.getLatestSeqInCDP(ctx); err != nil { + if err := b.dsInfo.refreshLatestSeqInDatastore(ctx); err != nil { return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } bounds.frontfill.Last = b.dsInfo.sequences.Last @@ -331,7 +331,7 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds rChunkBound = lChunkBound - 1 // Refresh lBound periodically to account for ledgers coming into the datastore if i > 0 && i%10 == 0 { - if err := b.dsInfo.getLatestSeqInCDP(ctx); err != nil { + if err := b.dsInfo.refreshLatestSeqInDatastore(ctx); err != nil { return backfillBounds{}, err } lBound = max(b.dsInfo.sequences.Last-bounds.nBackfill+1, b.dsInfo.sequences.First) @@ -394,8 +394,8 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { ) } -// Gets the latest ledger number stored in the cloud Datastore/datalake and updates datastoreInfo.sequences.Last -func (d *datastoreInfo) getLatestSeqInCDP(callerCtx context.Context) error { +// Gets the latest ledger number stored in the cloud datastore and updates datastoreInfo.sequences.Last +func (d *datastoreInfo) refreshLatestSeqInDatastore(callerCtx context.Context) error { ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) defer cancelRunBackfill() diff --git a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go index 84c123d0..48974ab6 100644 --- a/cmd/stellar-rpc/internal/integrationtest/backfill_test.go +++ b/cmd/stellar-rpc/internal/integrationtest/backfill_test.go @@ -64,7 +64,7 @@ func testBackfillWithSeededDbLedgers(t *testing.T, localDbStart, localDbEnd uint DatastoreConfigFunc: makeDatastoreConfig, NoParallel: true, // can't use parallel due to env vars DelayDaemonForLedgerN: int(datastoreEnd), // stops daemon start until core has at least the datastore ledgers - IgnoreLedgerCloseTimes: true, // artificially seeded ledgers don't need correct close times relative to core's + IgnoreLedgerCloseTimes: true, // fake/seeded ledgers don't need correct close times relative to core's }) testDb := test.GetDaemon().GetDB() From 7b1544308ce689f94922315be9e658380cd410bb Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 27 Jan 2026 18:55:06 -0500 Subject: [PATCH 67/72] reduced state, improved warnings/errors --- cmd/stellar-rpc/internal/db/ledger.go | 1 - cmd/stellar-rpc/internal/ingest/backfill.go | 94 ++++++++------------- cmd/stellar-rpc/internal/ingest/service.go | 4 +- 3 files changed, 37 insertions(+), 62 deletions(-) diff --git a/cmd/stellar-rpc/internal/db/ledger.go b/cmd/stellar-rpc/internal/db/ledger.go index be909920..b5ca3a3b 100644 --- a/cmd/stellar-rpc/internal/db/ledger.go +++ b/cmd/stellar-rpc/internal/db/ledger.go @@ -279,7 +279,6 @@ func getLedgerRangeWithoutCache(ctx context.Context, db readDB) (ledgerbucketwin } // Queries a local DB, and in the inclusive range [start, end], returns the count of ledgers, and min/max sequence nums -// Assumes all sequence numbers in the DB are unique func getLedgerCountInRange(ctx context.Context, db readDB, start, end uint32) (uint32, uint32, uint32, error) { sql := sq.Select("COUNT(*) as count", "MIN(sequence) as min_seq", "MAX(sequence) as max_seq"). From(ledgerCloseMetaTableName). diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 592747b8..73f5ac33 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -103,12 +103,10 @@ func NewBackfillMeta( // It guarantees the backfill of the most recent cfg.HistoryRetentionWindow ledgers // Requires that no sequence number gaps exist in the local DB prior to backfilling func (b *BackfillMeta) RunBackfill(cfg *config.Config) error { - timeout := max(cfg.HistoryRetentionWindow/config.OneDayOfLedgers, 1) - ctx, cancelBackfill := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Hour) - defer cancelBackfill() + ctx := context.Background() // Ensure no pre-existing gaps in local DB - if err := b.runCheckNoGaps(ctx, cfg.IngestionTimeout); err != nil { + if _, _, err := b.verifyDbGapless(ctx, cfg.IngestionTimeout); err != nil { return err } bounds, err := b.setBounds(ctx, cfg.HistoryRetentionWindow, cfg.CheckpointFrequency) @@ -116,32 +114,21 @@ func (b *BackfillMeta) RunBackfill(cfg *config.Config) error { return errors.Wrap(err, "could not set backfill bounds") } - // Fill backwards from local DB tail to the left edge of retention window if necessary + // Fill backwards from local DB tail to the left edge of retention window if the DB already has ledgers if bounds, err = b.runBackfill(ctx, bounds); err != nil { return err } - // Fill forward from local DB head (or left edge of retention window, if empty) to current tip of datastore if bounds, err = b.runFrontfill(ctx, bounds); err != nil { return err } - // Ensure no gaps introduced and retention window requirements met - return b.runPostcheck(ctx, cfg.IngestionTimeout, bounds.nBackfill) -} - -// Ensures local DB is gapless prior to backfilling -func (b *BackfillMeta) runCheckNoGaps(ctx context.Context, timeout time.Duration) error { - b.logger.Infof("Starting initialization/precheck for backfilling the local database") - if !b.dbInfo.isNewDb { - if _, _, err := b.verifyDbGapless(ctx, timeout); err != nil { - return errors.Wrap(err, "backfill precheck failed") - } - } else { - b.logger.Infof("Local DB is empty, skipping precheck") + // Ensure no gaps introduced and retention window requirements are (at least approximately) met + minSeq, maxSeq, err := b.verifyDbGapless(ctx, cfg.IngestionTimeout) + if err != nil { + return err } - b.logger.Infof("Precheck and initialization passed, no gaps detected in local DB") - return nil + return b.verifyBounds(bounds.nBackfill, minSeq, maxSeq) } // Sets the bounds for backfill and frontfill phases, determines number of ledgers to backfill, and @@ -152,10 +139,10 @@ func (b *BackfillMeta) setBounds( checkpointFrequency uint32, ) (backfillBounds, error) { // Determine bounds for ledgers to be written to local DB in backfill and frontfill phases - if err := b.dsInfo.refreshLatestSeqInDatastore(ctx); err != nil { + currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) + if err != nil { return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - currentTipLedger := b.dsInfo.sequences.Last fillBounds := backfillBounds{ nBackfill: min(retentionWindow, currentTipLedger), checkpointAligner: checkpoint.NewCheckpointManager(checkpointFrequency), @@ -190,7 +177,7 @@ func (b *BackfillMeta) setBounds( fillBounds.backfill.Last = minDbSeq - 1 fillBounds.frontfill.First = maxDbSeq + 1 fillCount = fillBounds.nBackfill - (maxDbSeq - minDbSeq + 1) - // set frontfill last to current datastore tip later during frontfill phase + // frontfill last changes dynamically based on current tip ledger } b.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", currentTipLedger, fillCount) @@ -223,14 +210,14 @@ func (b *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillBounds) numIterations = 2 } for range numIterations { - if err := b.dsInfo.refreshLatestSeqInDatastore(ctx); err != nil { + currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) + if err != nil { return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - bounds.frontfill.Last = b.dsInfo.sequences.Last - bounds.frontfill.Last = bounds.checkpointAligner.PrevCheckpoint(bounds.frontfill.Last) - if bounds.frontfill.First < bounds.frontfill.Last { + currentTipLedger = bounds.checkpointAligner.PrevCheckpoint(currentTipLedger) + if bounds.frontfill.First < currentTipLedger { b.logger.Infof("Frontfilling to the current datastore tip, ledgers [%d -> %d]", - bounds.frontfill.First, bounds.frontfill.Last) + bounds.frontfill.First, currentTipLedger) if err := b.frontfillChunks(ctx, bounds); err != nil { return backfillBounds{}, errors.Wrap(err, "frontfill failed") } @@ -238,21 +225,16 @@ func (b *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillBounds) b.logger.Infof("No frontfill needed, local DB head already at datastore tip") } // Update frontfill.First for next iteration (if any) - bounds.frontfill.First = bounds.frontfill.Last + 1 + bounds.frontfill.First = currentTipLedger + 1 } - b.dbInfo.sequences.Last = max(bounds.frontfill.Last, b.dbInfo.sequences.Last) + b.dbInfo.sequences.Last = max(bounds.frontfill.First-1, b.dbInfo.sequences.Last) b.logger.Infof("Forward backfill of recent ledgers complete") return bounds, nil } -// Verifies backfilled ledgers are gapless and meet retention window requirements -func (b *BackfillMeta) runPostcheck(ctx context.Context, timeout time.Duration, nBackfill uint32) error { - b.logger.Infof("Starting post-backfill verification") - minSeq, maxSeq, err := b.verifyDbGapless(ctx, timeout) +// Verifies backfilled ledgers meet retention window requirements and warns if not +func (b *BackfillMeta) verifyBounds(nBackfill, minSeq, maxSeq uint32) error { count := maxSeq - minSeq + 1 - if err != nil { - return errors.Wrap(err, "post-backfill verification failed") - } if count+ledgerThreshold < nBackfill { b.logger.Warnf("post-backfill verification warning: expected at least %d ledgers, "+ "got %d ledgers (exceeds acceptable threshold of %d missing ledgers)", nBackfill, count, ledgerThreshold) @@ -268,7 +250,9 @@ func (b *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time.Duratio defer cancelCheckNoGaps() ledgerRange, err := b.dbInfo.reader.GetLedgerRange(ctx) - if err != nil { + if errors.Is(err, db.ErrEmptyDB) { + return 0, 0, nil // empty DB is considered gapless + } else if err != nil { return 0, 0, errors.Wrap(err, "db verify: could not get ledger range") } // Get sequence number of highest/lowest ledgers in local DB @@ -291,7 +275,7 @@ func (b *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time.Duratio // Used to fill local DB backwards towards older ledgers (starting from newest) func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { lBound, rBound := bounds.backfill.First, bounds.backfill.Last - for i, rChunkBound := 0, rBound; rChunkBound >= lBound; i++ { + for i, rChunkBound := 0, rBound; rChunkBound >= lBound; i++ { // note: lBound changes in the loop body if err := ctx.Err(); err != nil { return backfillBounds{}, err } @@ -303,7 +287,7 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds } defer func() { if err := tempBackend.Close(); err != nil { - b.logger.Warnf("error closing temporary backend: %v", err) + b.logger.Errorf("error closing temporary backend: %v", err) } }() @@ -325,16 +309,17 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds b.logger.Infof("Backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) - if lChunkBound == lBound { + if lChunkBound <= lBound { break } rChunkBound = lChunkBound - 1 // Refresh lBound periodically to account for ledgers coming into the datastore if i > 0 && i%10 == 0 { - if err := b.dsInfo.refreshLatestSeqInDatastore(ctx); err != nil { + if currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds); err != nil { return backfillBounds{}, err + } else { + lBound = max(currentTipLedger-bounds.nBackfill+1, b.dsInfo.sequences.First) } - lBound = max(b.dsInfo.sequences.Last-bounds.nBackfill+1, b.dsInfo.sequences.First) } } bounds.backfill.First = lBound @@ -344,7 +329,11 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB forwards towards the current ledger tip func (b *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfillBounds) error { - lBound, rBound := bounds.frontfill.First, bounds.frontfill.Last + rBound, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) + if err != nil { + return errors.Wrap(err, "could not get latest ledger number from cloud datastore") + } + lBound := bounds.frontfill.First // Backend for frontfill can be persistent over multiple chunks backend, err := makeBackend(b.dsInfo) if err != nil { @@ -352,7 +341,7 @@ func (b *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfillBound } defer func() { if err := backend.Close(); err != nil { - b.logger.Warnf("error closing ledger backend: %v", err) + b.logger.Errorf("error closing ledger backend: %v", err) } }() @@ -393,16 +382,3 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { dsInfo.schema, ) } - -// Gets the latest ledger number stored in the cloud datastore and updates datastoreInfo.sequences.Last -func (d *datastoreInfo) refreshLatestSeqInDatastore(callerCtx context.Context) error { - ctx, cancelRunBackfill := context.WithTimeout(callerCtx, 5*time.Second) - defer cancelRunBackfill() - - var err error - d.sequences.Last, err = datastore.FindLatestLedgerSequence(ctx, d.ds) - if err != nil { - return errors.Wrap(err, "could not get latest ledger sequence from datastore") - } - return nil -} diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index 4072c2de..c351bf1c 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -199,7 +199,7 @@ func (s *Service) ingest(ctx context.Context, sequence uint32) error { } defer func() { if err := tx.Rollback(); err != nil { - s.logger.WithError(err).Warn("could not rollback ingest write transactions") + s.logger.WithError(err).Fatal("could not rollback ingest write transactions") } }() @@ -253,7 +253,7 @@ func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBacken defer func() { if err := tx.Rollback(); err != nil { - s.logger.WithError(err).Warn("could not rollback ingest write transactions") + s.logger.WithError(err).Fatal("could not rollback ingest write transactions") } }() From bc4bf00c94e7f68cbc026e035ecf010579297644 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 27 Jan 2026 19:17:53 -0500 Subject: [PATCH 68/72] changed backend close failure logging to use WithError --- cmd/stellar-rpc/internal/ingest/backfill.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 73f5ac33..901e69b5 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -287,7 +287,7 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds } defer func() { if err := tempBackend.Close(); err != nil { - b.logger.Errorf("error closing temporary backend: %v", err) + b.logger.WithError(err).Error("error closing temporary backend") } }() @@ -341,7 +341,7 @@ func (b *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfillBound } defer func() { if err := backend.Close(); err != nil { - b.logger.Errorf("error closing ledger backend: %v", err) + b.logger.WithError(err).Error("error closing ledger backend") } }() From 348278d14c5b399a5b53aaeb95a102223d5191c8 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Tue, 27 Jan 2026 19:20:21 -0500 Subject: [PATCH 69/72] linter --- cmd/stellar-rpc/internal/ingest/backfill.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 901e69b5..2c06f6ab 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -315,11 +315,11 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds rChunkBound = lChunkBound - 1 // Refresh lBound periodically to account for ledgers coming into the datastore if i > 0 && i%10 == 0 { - if currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds); err != nil { + currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) + if err != nil { return backfillBounds{}, err - } else { - lBound = max(currentTipLedger-bounds.nBackfill+1, b.dsInfo.sequences.First) } + lBound = max(currentTipLedger-bounds.nBackfill+1, b.dsInfo.sequences.First) } } bounds.backfill.First = lBound From 721c780ab7b27a2ee479464923b151fd798a7165 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 28 Jan 2026 13:34:20 -0500 Subject: [PATCH 70/72] simplified branching logic and resolved nits --- cmd/stellar-rpc/internal/ingest/backfill.go | 92 ++++++++++----------- cmd/stellar-rpc/internal/ingest/service.go | 7 +- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 2c06f6ab..6da73c39 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -45,7 +45,7 @@ type databaseInfo struct { isNewDb bool } -type backfillBounds struct { +type fillBounds struct { backfill db.LedgerSeqRange frontfill db.LedgerSeqRange nBackfill uint32 @@ -137,92 +137,87 @@ func (b *BackfillMeta) setBounds( ctx context.Context, retentionWindow uint32, checkpointFrequency uint32, -) (backfillBounds, error) { - // Determine bounds for ledgers to be written to local DB in backfill and frontfill phases +) (fillBounds, error) { currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) if err != nil { - return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") + return fillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } - fillBounds := backfillBounds{ + bounds := fillBounds{ nBackfill: min(retentionWindow, currentTipLedger), checkpointAligner: checkpoint.NewCheckpointManager(checkpointFrequency), } - // Determine starting ledger to fill from + // Determine the oldest/starting ledger to fill from var fillStartMin uint32 // minimum possible ledger to start from - if currentTipLedger >= fillBounds.nBackfill+1 { - fillStartMin = max(currentTipLedger-fillBounds.nBackfill+1, b.dsInfo.sequences.First) + if currentTipLedger >= bounds.nBackfill+1 { + fillStartMin = max(currentTipLedger-bounds.nBackfill+1, b.dsInfo.sequences.First) } else { fillStartMin = b.dsInfo.sequences.First } + minDbSeq, maxDbSeq := b.dbInfo.sequences.First, b.dbInfo.sequences.Last var fillCount uint32 // if initial DB empty or tail covers edge of filling window, skip backwards backfill - if b.dbInfo.isNewDb || minDbSeq <= fillStartMin { - if b.dbInfo.isNewDb { - fillBounds.frontfill.First = fillStartMin - fillCount = currentTipLedger - fillStartMin + 1 - } else { - // DB tail already covers left edge of retention window - fillBounds.frontfill.First = maxDbSeq + 1 - fillCount = currentTipLedger - maxDbSeq - } - fillBounds.backfill.First = 1 // indicates backfill phase is skipped + if b.dbInfo.isNewDb { + bounds.frontfill.First = fillStartMin + fillCount = currentTipLedger - fillStartMin + 1 + } else if minDbSeq <= fillStartMin { + // DB tail already covers left edge of retention window + bounds.frontfill.First = maxDbSeq + 1 + fillCount = currentTipLedger - maxDbSeq } else { if currentTipLedger < b.dbInfo.sequences.First { // this would introduce a gap missing ledgers of sequences between the current tip and local DB minimum - return backfillBounds{}, errors.New("current datastore tip is older than local DB minimum ledger") + return fillBounds{}, errors.New("current datastore tip is older than local DB minimum ledger") } - fillBounds.backfill.First = fillStartMin - fillBounds.backfill.Last = minDbSeq - 1 - fillBounds.frontfill.First = maxDbSeq + 1 - fillCount = fillBounds.nBackfill - (maxDbSeq - minDbSeq + 1) + bounds.backfill.First = fillStartMin + bounds.backfill.Last = minDbSeq - 1 + bounds.frontfill.First = maxDbSeq + 1 + fillCount = bounds.nBackfill - (maxDbSeq - minDbSeq + 1) // frontfill last changes dynamically based on current tip ledger } b.logger.Infof("Current tip ledger in cloud datastore is %d, going to backfill %d ledgers", currentTipLedger, fillCount) - return fillBounds, nil + return bounds, nil } // Backfills the local DB with older ledgers from newest to oldest within the retention window -func (b *BackfillMeta) runBackfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { +func (b *BackfillMeta) runBackfill(ctx context.Context, bounds fillBounds) (fillBounds, error) { var err error - if bounds.backfill.First <= bounds.backfill.Last { + if bounds.needBackfillPhase() { b.logger.Infof("Backfilling to the left edge of retention window, ledgers [%d <- %d]", bounds.backfill.First, bounds.backfill.Last) if bounds, err = b.backfillChunks(ctx, bounds); err != nil { - return backfillBounds{}, errors.Wrap(err, "backfill failed") + return fillBounds{}, errors.Wrap(err, "backfill failed") } b.dbInfo.sequences.First = bounds.backfill.First b.logger.Infof("Backfill of old ledgers complete") - } else { - b.logger.Infof("No backfill needed, local DB tail already at retention window edge") } return bounds, nil } // Backfills the local DB with older ledgers from oldest to newest within the retention window -func (b *BackfillMeta) runFrontfill(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { +func (b *BackfillMeta) runFrontfill(ctx context.Context, bounds fillBounds) (fillBounds, error) { numIterations := 1 // If we skipped backfilling, we want to fill forwards twice because the latest ledger may be // significantly further in the future after the first fill completes and fills are faster than catch-up. - if bounds.backfill.First > bounds.backfill.Last { + if !bounds.needBackfillPhase() { numIterations = 2 } for range numIterations { currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) if err != nil { - return backfillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") + return fillBounds{}, errors.Wrap(err, "could not get latest ledger number from cloud datastore") } currentTipLedger = bounds.checkpointAligner.PrevCheckpoint(currentTipLedger) if bounds.frontfill.First < currentTipLedger { b.logger.Infof("Frontfilling to the current datastore tip, ledgers [%d -> %d]", bounds.frontfill.First, currentTipLedger) if err := b.frontfillChunks(ctx, bounds); err != nil { - return backfillBounds{}, errors.Wrap(err, "frontfill failed") + return fillBounds{}, errors.Wrap(err, "frontfill failed") } } else { - b.logger.Infof("No frontfill needed, local DB head already at datastore tip") + b.logger.Infof("No extra filling needed, local DB head already at datastore tip") } // Update frontfill.First for next iteration (if any) bounds.frontfill.First = currentTipLedger + 1 @@ -257,8 +252,7 @@ func (b *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time.Duratio } // Get sequence number of highest/lowest ledgers in local DB minDbSeq, maxDbSeq := ledgerRange.FirstLedger.Sequence, ledgerRange.LastLedger.Sequence - b.logger.Debugf("DB verify: checking for gaps in [%d, %d]", - minDbSeq, maxDbSeq) + b.logger.Debugf("DB verify: checking for gaps in [%d, %d]", minDbSeq, maxDbSeq) expectedCount := maxDbSeq - minDbSeq + 1 count, sequencesMin, sequencesMax, err := b.dbInfo.reader.GetLedgerCountInRange(ctx, minDbSeq, maxDbSeq) if err != nil { @@ -273,17 +267,17 @@ func (b *BackfillMeta) verifyDbGapless(ctx context.Context, timeout time.Duratio // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB backwards towards older ledgers (starting from newest) -func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds) (backfillBounds, error) { +func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds fillBounds) (fillBounds, error) { lBound, rBound := bounds.backfill.First, bounds.backfill.Last for i, rChunkBound := 0, rBound; rChunkBound >= lBound; i++ { // note: lBound changes in the loop body if err := ctx.Err(); err != nil { - return backfillBounds{}, err + return fillBounds{}, err } // Create temporary backend for backward-filling chunks // Note monotonicity constraint of the ledger backend tempBackend, err := makeBackend(b.dsInfo) if err != nil { - return backfillBounds{}, errors.Wrap(err, "couldn't create backend") + return fillBounds{}, errors.Wrap(err, "couldn't create backend") } defer func() { if err := tempBackend.Close(); err != nil { @@ -291,20 +285,19 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds } }() - var lChunkBound uint32 - // Underflow check for chunk bounds + lChunkBound := lBound + // Underflow-safe check for setting left chunk bound if rChunkBound >= lBound+ChunkSize-1 { lChunkBound = max(lBound, rChunkBound-ChunkSize+1) - } else { - lChunkBound = lBound } + b.logger.Infof("Backfill: filling ledgers [%d, %d]", lChunkBound, rChunkBound) chunkRange := ledgerbackend.BoundedRange(lChunkBound, rChunkBound) if err := tempBackend.PrepareRange(ctx, chunkRange); err != nil { - return backfillBounds{}, err + return fillBounds{}, err } if err := b.ingestService.ingestRange(ctx, tempBackend, chunkRange); err != nil { - return backfillBounds{}, errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) + return fillBounds{}, errors.Wrapf(err, "couldn't fill chunk [%d, %d]", lChunkBound, rChunkBound) } b.logger.Infof("Backfill: committed ledgers [%d, %d]; %d%% done", lChunkBound, rChunkBound, 100*(rBound-lChunkBound)/max(rBound-lBound, 1)) @@ -317,7 +310,7 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds if i > 0 && i%10 == 0 { currentTipLedger, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) if err != nil { - return backfillBounds{}, err + return fillBounds{}, err } lBound = max(currentTipLedger-bounds.nBackfill+1, b.dsInfo.sequences.First) } @@ -328,7 +321,7 @@ func (b *BackfillMeta) backfillChunks(ctx context.Context, bounds backfillBounds // Backfills the local DB with ledgers in [lBound, rBound] from the cloud datastore // Used to fill local DB forwards towards the current ledger tip -func (b *BackfillMeta) frontfillChunks(ctx context.Context, bounds backfillBounds) error { +func (b *BackfillMeta) frontfillChunks(ctx context.Context, bounds fillBounds) error { rBound, err := datastore.FindLatestLedgerSequence(ctx, b.dsInfo.ds) if err != nil { return errors.Wrap(err, "could not get latest ledger number from cloud datastore") @@ -382,3 +375,8 @@ func makeBackend(dsInfo datastoreInfo) (ledgerbackend.LedgerBackend, error) { dsInfo.schema, ) } + +// Determines if backfill phase should be skipped +func (bounds fillBounds) needBackfillPhase() bool { + return !bounds.backfill.Empty() +} diff --git a/cmd/stellar-rpc/internal/ingest/service.go b/cmd/stellar-rpc/internal/ingest/service.go index c351bf1c..afdb9787 100644 --- a/cmd/stellar-rpc/internal/ingest/service.go +++ b/cmd/stellar-rpc/internal/ingest/service.go @@ -199,7 +199,7 @@ func (s *Service) ingest(ctx context.Context, sequence uint32) error { } defer func() { if err := tx.Rollback(); err != nil { - s.logger.WithError(err).Fatal("could not rollback ingest write transactions") + s.logger.WithError(err).Warn("could not rollback ingest write transactions") } }() @@ -243,8 +243,6 @@ func (s *Service) ingest(ctx context.Context, sequence uint32) error { // Ingests a range of ledgers from a provided ledgerBackend func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBackend, seqRange backends.Range) error { s.logger.Debugf("Ingesting ledgers [%d, %d]", seqRange.From(), seqRange.To()) - var ledgerCloseMeta xdr.LedgerCloseMeta - startTime := time.Now() tx, err := s.db.NewTx(ctx) if err != nil { @@ -253,10 +251,11 @@ func (s *Service) ingestRange(ctx context.Context, backend backends.LedgerBacken defer func() { if err := tx.Rollback(); err != nil { - s.logger.WithError(err).Fatal("could not rollback ingest write transactions") + s.logger.WithError(err).Warn("could not rollback ingest write transactions") } }() + var ledgerCloseMeta xdr.LedgerCloseMeta for seq := seqRange.From(); seq <= seqRange.To(); seq++ { ledgerCloseMeta, err = backend.GetLedger(ctx, seq) if err != nil { From da371996e48348dc9e76d88f9be32b3d8260c90c Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 28 Jan 2026 13:46:55 -0500 Subject: [PATCH 71/72] changed if-else if-else to switch --- cmd/stellar-rpc/internal/ingest/backfill.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index 6da73c39..fb15b5d7 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -158,14 +158,15 @@ func (b *BackfillMeta) setBounds( minDbSeq, maxDbSeq := b.dbInfo.sequences.First, b.dbInfo.sequences.Last var fillCount uint32 // if initial DB empty or tail covers edge of filling window, skip backwards backfill - if b.dbInfo.isNewDb { + switch { + case b.dbInfo.isNewDb: bounds.frontfill.First = fillStartMin fillCount = currentTipLedger - fillStartMin + 1 - } else if minDbSeq <= fillStartMin { + case minDbSeq <= fillStartMin: // DB tail already covers left edge of retention window bounds.frontfill.First = maxDbSeq + 1 fillCount = currentTipLedger - maxDbSeq - } else { + default: if currentTipLedger < b.dbInfo.sequences.First { // this would introduce a gap missing ledgers of sequences between the current tip and local DB minimum return fillBounds{}, errors.New("current datastore tip is older than local DB minimum ledger") From 2a66cdc158c09461946e86c3ce11ebe3ae056714 Mon Sep 17 00:00:00 2001 From: Christian Jonas Date: Wed, 28 Jan 2026 14:04:28 -0500 Subject: [PATCH 72/72] moved backfill logging completion line outside of conditional --- cmd/stellar-rpc/internal/ingest/backfill.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/stellar-rpc/internal/ingest/backfill.go b/cmd/stellar-rpc/internal/ingest/backfill.go index fb15b5d7..0c434794 100644 --- a/cmd/stellar-rpc/internal/ingest/backfill.go +++ b/cmd/stellar-rpc/internal/ingest/backfill.go @@ -192,8 +192,8 @@ func (b *BackfillMeta) runBackfill(ctx context.Context, bounds fillBounds) (fill return fillBounds{}, errors.Wrap(err, "backfill failed") } b.dbInfo.sequences.First = bounds.backfill.First - b.logger.Infof("Backfill of old ledgers complete") } + b.logger.Infof("Backfill of old ledgers complete") return bounds, nil }