-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-55751][SS] Add metrics on state store loads from cloud storage #54567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -244,6 +244,9 @@ class RocksDB( | |
| // Was snapshot auto repair performed when loading the current version | ||
| @volatile private var performedSnapshotAutoRepair = false | ||
|
|
||
| // Number of DFS (cloud) fetches performed when loading the current version | ||
| @volatile private var numCloudLoads = 0L | ||
|
|
||
| @volatile private var fileManagerMetrics = RocksDBFileManagerMetrics.EMPTY_METRICS | ||
|
|
||
| // SPARK-46249 - Keep track of recorded metrics per version which can be used for querying later | ||
|
|
@@ -515,10 +518,7 @@ class RocksDB( | |
| MDC(LogKeys.SNAPSHOT_VERSION, latestSnapshotVersion)}, latestSnapshotUniqueId: ${ | ||
| MDC(LogKeys.UUID, latestSnapshotUniqueId)}") | ||
|
|
||
| val metadata = fileManager.loadCheckpointFromDfs(latestSnapshotVersion, | ||
| workingDir, rocksDBFileMapping, latestSnapshotUniqueId) | ||
|
|
||
| loadedVersion = latestSnapshotVersion | ||
| val metadata = fetchCheckpointFromDfs(latestSnapshotVersion, latestSnapshotUniqueId) | ||
|
|
||
| // reset the last snapshot version to the latest available snapshot version | ||
| lastSnapshotVersion = latestSnapshotVersion | ||
|
|
@@ -601,12 +601,30 @@ class RocksDB( | |
|
|
||
| private def loadEmptyStore(version: Long): Unit = { | ||
| // Use version 0 logic to create empty directory with no SST files | ||
| val metadata = fileManager.loadCheckpointFromDfs(0, workingDir, rocksDBFileMapping, None) | ||
| val metadata = fetchCheckpointFromDfs(0) | ||
| // No real snapshot exists at this version; advance loadedVersion to the target | ||
| // so the next commit produces version + 1 rather than 1. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please lets remove this comment. It isn't correct |
||
| loadedVersion = version | ||
| fileManager.setMaxSeenVersion(version) | ||
| openLocalRocksDB(metadata) | ||
| } | ||
|
|
||
| /** | ||
| * Fetches a snapshot from DFS, sets [[loadedVersion]] to the snapshot version, | ||
| * and increments [[numCloudLoads]]. Returns the checkpoint metadata. | ||
| * Callers are responsible for calling [[openLocalRocksDB]], setting | ||
| * [[lastSnapshotVersion]], and any load-path-specific [[loadedVersion]] overrides. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please lets remove these 2 lines of comment. It is not essential for this func |
||
| */ | ||
| private def fetchCheckpointFromDfs( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| snapshotVersion: Long, | ||
| uniqueId: Option[String] = None): RocksDBCheckpointMetadata = { | ||
| numCloudLoads += 1 | ||
| val metadata = fileManager.loadCheckpointFromDfs( | ||
| snapshotVersion, workingDir, rocksDBFileMapping, uniqueId) | ||
| loadedVersion = snapshotVersion | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Honestly I wouldn't add this extra function, makes the side effect even more difficult to reason about from the calling function. Just set the load flag that you are introducing at the source. It is a one-line change. |
||
| metadata | ||
| } | ||
|
|
||
| private def loadWithoutCheckpointId( | ||
| version: Long, | ||
| readOnly: Boolean = false, | ||
|
|
@@ -680,14 +698,12 @@ class RocksDB( | |
| override protected def beforeLoad(): Unit = closeDB(ignoreException = false) | ||
|
|
||
| override protected def loadSnapshotFromCheckpoint(snapshotVersion: Long): Unit = { | ||
| val remoteMetaData = fileManager.loadCheckpointFromDfs(snapshotVersion, | ||
| workingDir, rocksDBFileMapping) | ||
| val metadata = fetchCheckpointFromDfs(snapshotVersion) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is why your test is having > 1 count for when autorepair is done. Even if autorepair is done, we just care whether a store was loaded from DFS or not. |
||
|
|
||
| loadedVersion = snapshotVersion | ||
| // Initialize maxVersion upon successful load from DFS | ||
| fileManager.setMaxSeenVersion(snapshotVersion) | ||
|
|
||
| openLocalRocksDB(remoteMetaData) | ||
| openLocalRocksDB(metadata) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets avoid these unnecessary variable renames. It just adds unnecessary extra lines to the PR |
||
|
|
||
| // By setting this to the snapshot version we successfully loaded, | ||
| // if auto snapshot repair is enabled, and we end up skipping the latest snapshot | ||
|
|
@@ -782,6 +798,7 @@ class RocksDB( | |
| assert(version >= 0) | ||
| recordedMetrics = None | ||
| performedSnapshotAutoRepair = false | ||
| numCloudLoads = 0L | ||
| // Reset the load metrics before loading | ||
| loadMetrics.clear() | ||
|
|
||
|
|
@@ -800,7 +817,8 @@ class RocksDB( | |
| // Record the metrics after loading | ||
| val duration = System.currentTimeMillis() - startTime | ||
| loadMetrics ++= Map( | ||
| "load" -> duration | ||
| "load" -> duration, | ||
| "numCloudLoads" -> numCloudLoads | ||
| ) | ||
| // Register with memory manager after successful load | ||
| updateMemoryUsageIfNeeded() | ||
|
|
@@ -831,6 +849,7 @@ class RocksDB( | |
| assert(snapshotVersionStateStoreCkptId.isDefined == endVersionStateStoreCkptId.isDefined) | ||
| assert(snapshotVersion >= 0 && endVersion >= snapshotVersion) | ||
| recordedMetrics = None | ||
| numCloudLoads = 0L | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this function would always load from DFS. Basically it is in the name, "load from a snapshot in the DFS" |
||
| loadMetrics.clear() | ||
|
|
||
| logInfo( | ||
|
|
@@ -856,7 +875,8 @@ class RocksDB( | |
|
|
||
| // Record the metrics after loading | ||
| loadMetrics ++= Map( | ||
| "loadFromSnapshot" -> (System.currentTimeMillis() - startTime) | ||
| "loadFromSnapshot" -> (System.currentTimeMillis() - startTime), | ||
| "numCloudLoads" -> numCloudLoads | ||
| ) | ||
|
|
||
| this | ||
|
|
@@ -880,9 +900,7 @@ class RocksDB( | |
| assert(snapshotVersionStateStoreCkptId.isDefined == endVersionStateStoreCkptId.isDefined) | ||
|
|
||
| closeDB() | ||
| val metadata = fileManager.loadCheckpointFromDfs(snapshotVersion, | ||
| workingDir, rocksDBFileMapping, snapshotVersionStateStoreCkptId) | ||
| loadedVersion = snapshotVersion | ||
| val metadata = fetchCheckpointFromDfs(snapshotVersion, snapshotVersionStateStoreCkptId) | ||
| lastSnapshotVersion = snapshotVersion | ||
|
|
||
| setInitialCFInfo() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -659,6 +659,7 @@ private[sql] class RocksDBStateStoreProvider | |
| CUSTOM_METRIC_LOAD_TIME -> loadMetrics("load"), | ||
| CUSTOM_METRIC_REPLAY_CHANGE_LOG -> loadMetrics("replayChangelog"), | ||
| CUSTOM_METRIC_NUM_REPLAY_CHANGE_LOG_FILES -> loadMetrics("numReplayChangeLogFiles"), | ||
| CUSTOM_METRIC_LOADED_FROM_CLOUD -> loadMetrics("numCloudLoads"), | ||
| CUSTOM_METRIC_BYTES_COPIED -> rocksDBMetrics.bytesCopied, | ||
| CUSTOM_METRIC_FILES_COPIED -> rocksDBMetrics.filesCopied, | ||
| CUSTOM_METRIC_FILES_REUSED -> rocksDBMetrics.filesReused, | ||
|
|
@@ -1367,6 +1368,10 @@ object RocksDBStateStoreProvider { | |
| "rocksdbNumReplayChangelogFiles", | ||
| "RocksDB: load - number of change log files replayed") | ||
|
|
||
| val CUSTOM_METRIC_LOADED_FROM_CLOUD = StateStoreCustomSumMetric( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| "rocksdbLoadedFromCloud", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto, fix name |
||
| "RocksDB: load - number of times state was loaded from cloud storage") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto, lets avoid using Cloud. use: |
||
|
|
||
| val CUSTOM_METRIC_BLOCK_CACHE_MISS = StateStoreCustomSumMetric( | ||
| "rocksdbReadBlockCacheMissCount", | ||
| "RocksDB: read - count of cache misses that required reading from local disk") | ||
|
|
@@ -1436,7 +1441,7 @@ object RocksDBStateStoreProvider { | |
| CUSTOM_METRIC_NUM_EXTERNAL_COL_FAMILIES, CUSTOM_METRIC_NUM_INTERNAL_COL_FAMILIES, | ||
| CUSTOM_METRIC_LOAD_FROM_SNAPSHOT_TIME, CUSTOM_METRIC_LOAD_TIME, CUSTOM_METRIC_REPLAY_CHANGE_LOG, | ||
| CUSTOM_METRIC_NUM_REPLAY_CHANGE_LOG_FILES, CUSTOM_METRIC_NUM_SNAPSHOTS_AUTO_REPAIRED, | ||
| CUSTOM_METRIC_FORCE_SNAPSHOT) | ||
| CUSTOM_METRIC_FORCE_SNAPSHOT, CUSTOM_METRIC_LOADED_FROM_CLOUD) | ||
|
|
||
| val CUSTOM_INSTANCE_METRIC_SNAPSHOT_LAST_UPLOADED = StateStoreSnapshotLastUploadInstanceMetric() | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -114,7 +114,8 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest | |
| "SnapshotLastUploaded.partition_0_default", "rocksdbChangeLogWriterCommitLatencyMs", | ||
| "rocksdbSaveZipFilesLatencyMs", "rocksdbLoadFromSnapshotLatencyMs", | ||
| "rocksdbLoadLatencyMs", "rocksdbReplayChangeLogLatencyMs", | ||
| "rocksdbNumReplayChangelogFiles", "rocksdbForceSnapshotCount")) | ||
| "rocksdbNumReplayChangelogFiles", "rocksdbForceSnapshotCount", | ||
| "rocksdbLoadedFromCloud")) | ||
| assert(stateOperatorMetrics.customMetrics.get("rocksdbNumSnapshotsAutoRepaired") == 0, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lets verify the metric value too |
||
| "Should be 0 since we didn't repair any snapshot") | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2920,6 +2920,7 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession | |
|
|
||
| val m1 = db.metricsOpt.get | ||
| assert(m1.loadMetrics("load") > 0) | ||
| assert(m1.loadMetrics("numCloudLoads") === 1) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't see any other test where you are also verifying load is 0 |
||
| // since we called load, loadFromSnapshot should not be populated | ||
| assert(!m1.loadMetrics.contains("loadFromSnapshot")) | ||
|
|
||
|
|
@@ -2956,6 +2957,7 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession | |
| db.refreshRecordedMetricsForTest() | ||
| val m1 = db.metricsOpt.get | ||
| assert(m1.loadMetrics("loadFromSnapshot") > 0) | ||
| assert(m1.loadMetrics("numCloudLoads") === 1) | ||
| // since we called loadFromSnapshot, load should not be populated | ||
| assert(!m1.loadMetrics.contains("load")) | ||
| assert(m1.loadMetrics("replayChangelog") > 0) | ||
|
|
@@ -4073,6 +4075,8 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession | |
| db.put("e", "4") | ||
| db.commit() // a new snapshot (5.zip) will be created since previous one is corrupt | ||
| assert(db.metricsOpt.get.numSnapshotsAutoRepaired == 1) | ||
| // 4.zip was tried and failed (1 load), then 2.zip succeeded (2 loads) | ||
| assert(db.metricsOpt.get.loadMetrics("numCloudLoads") === 2) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. incorrect |
||
| db.doMaintenance() // upload snapshot 5.zip | ||
| } | ||
|
|
||
|
|
@@ -4085,6 +4089,8 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession | |
| assert(toStr(db.get("b")) == "1") | ||
| db.commit() | ||
| assert(db.metricsOpt.get.numSnapshotsAutoRepaired == 1) | ||
| // 5.zip failed (1), 4.zip failed (2), 2.zip failed (3), then version 0 succeeded (4) | ||
| assert(db.metricsOpt.get.loadMetrics("numCloudLoads") === 4) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. incorrect |
||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should never be greater than 1 for a given store instance. Hence can be a boolean. For a store if loaded from DFS then should be 1, if not, 0. Just like the
performedSnapshotAutoRepairabove.Also lets avoid referring to this as cloud, you would see that existing code calls it DFS. Because spark can run using storage that isn't cloud.