From e2dbd29e69f97083daf9f11138b2ecdd28c87e6d Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Thu, 30 Oct 2025 23:40:03 +0000 Subject: [PATCH 01/35] Compactor support tenant bucket prefixing --- cmd/thanos/compact.go | 956 ++++++++++++++++++++----------------- cmd/thanos/compact_test.go | 284 +++++++++++ 2 files changed, 795 insertions(+), 445 deletions(-) create mode 100644 cmd/thanos/compact_test.go diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 1530c5cf5b0..bd7ed311fc8 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -26,6 +26,7 @@ import ( "github.com/prometheus/common/route" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb" + "gopkg.in/yaml.v2" "github.com/thanos-io/objstore" "github.com/thanos-io/objstore/client" @@ -165,6 +166,23 @@ func newCompactMetrics(reg *prometheus.Registry, deleteDelay time.Duration) *com return m } +type TenantBucketConfig struct { + TenantPrefixes []string `yaml:"tenant_prefixes"` + // Other config fields can be added here if needed +} + +func accessTenantPrefixes(bucketConf client.BucketConfig) ([]string, error) { + configBytes, err := yaml.Marshal(bucketConf.Config) + if err != nil { + return nil, errors.Wrap(err, "failed to marshal bucket config") + } + var tenantBucketConfig TenantBucketConfig + if err := yaml.Unmarshal(configBytes, &tenantBucketConfig); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal into tenant bucket config") + } + return tenantBucketConfig.TenantPrefixes, nil +} + func runCompact( g *run.Group, logger log.Logger, @@ -207,552 +225,600 @@ func runCompact( return err } - bkt, err := client.NewBucket(logger, confContentYaml, component.String(), nil) - if conf.enableFolderDeletion { - bkt, err = block.WrapWithAzDataLakeSdk(logger, confContentYaml, bkt) - level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled", "name", bkt.Name()) + // Determine tenant prefixes to use (if provided) + var bucketConf client.BucketConfig + if err := yaml.Unmarshal(confContentYaml, &bucketConf); err != nil { + return errors.Wrap(err, "parse bucket config") } - if err != nil { - return err - } - insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) - relabelContentYaml, err := conf.selectorRelabelConf.Content() - if err != nil { - return errors.Wrap(err, "get content of relabel configuration") + var tenantPrefixes []string + tenantPrefixes, err = accessTenantPrefixes(bucketConf) + if err != nil || len(tenantPrefixes) == 0 { + tenantPrefixes = []string{""} + level.Info(logger).Log("msg", "tenant prefixes not found in bucket config, assuming single-tenant mode") + } else { + level.Info(logger).Log("msg", "tenant prefixes found in bucket config, running in multi-tenant mode", "prefixes", strings.Join(tenantPrefixes, ",")) } - relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions) - if err != nil { - return err - } + // Start compaction for each tenant + // Each will get its own bucket created via client.NewBucket with the appropriate prefix + for _, tenantPrefix := range tenantPrefixes { + + if tenantPrefix != "" { + bucketConf.Prefix = "v1/raw/" + tenantPrefix + } - // Ensure we close up everything properly. - defer func() { + tenantConfYaml, err := yaml.Marshal(bucketConf) if err != nil { - runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") + return errors.Wrap(err, "marshal tenant bucket config") } - }() - - // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. - // The delay of deleteDelay/2 is added to ensure we fetch blocks that are meant to be deleted but do not have a replacement yet. - // This is to make sure compactor will not accidentally perform compactions with gap instead. - ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, deleteDelay/2, conf.blockMetaFetchConcurrency) - duplicateBlocksFilter := block.NewDeduplicateFilter(conf.blockMetaFetchConcurrency) - noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) - noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) - labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", reg)) - timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) - - var blockLister block.Lister - switch syncStrategy(conf.blockListStrategy) { - case concurrentDiscovery: - blockLister = block.NewConcurrentLister(logger, insBkt) - case recursiveDiscovery: - blockLister = block.NewRecursiveLister(logger, insBkt) - default: - return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) - } - baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) - if err != nil { - return errors.Wrap(err, "create meta fetcher") - } - enableVerticalCompaction := conf.enableVerticalCompaction - dedupReplicaLabels := strutil.ParseFlagLabels(conf.dedupReplicaLabels) - if len(dedupReplicaLabels) > 0 { - enableVerticalCompaction = true - level.Info(logger).Log( - "msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(dedupReplicaLabels, ","), - ) - } - if enableVerticalCompaction { - level.Info(logger).Log( - "msg", "vertical compaction is enabled", "compact.enable-vertical-compaction", fmt.Sprintf("%v", conf.enableVerticalCompaction), - ) - } - var ( - api = blocksAPI.NewBlocksAPI(logger, conf.webConf.disableCORS, conf.label, flagsMap, insBkt) - sy *compact.Syncer - ) - { - filters := []block.MetadataFilter{ - timePartitionMetaFilter, - labelShardedMetaFilter, - consistencyDelayMetaFilter, - ignoreDeletionMarkFilter, - block.NewReplicaLabelRemover(logger, dedupReplicaLabels), - duplicateBlocksFilter, - noCompactMarkerFilter, + // Create bucket for this tenant + if tenantPrefix != "" { + level.Info(logger).Log("msg", "creating compactor bucket with tenant prefix", "prefix", "v1/raw/"+tenantPrefix) } - if !conf.disableDownsampling { - filters = append(filters, noDownsampleMarkerFilter) + bkt, err := client.NewBucket(logger, tenantConfYaml, component.String(), nil) + if conf.enableFolderDeletion { + bkt, err = block.WrapWithAzDataLakeSdk(logger, tenantConfYaml, bkt) + if tenantPrefix != "" { + level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled for tenant", "prefix", "v1/raw/"+tenantPrefix, "name", bkt.Name()) + } else { + level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled", "name", bkt.Name()) + } + } + if err != nil { + return err } - // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. - cf := baseMetaFetcher.NewMetaFetcher( - extprom.WrapRegistererWithPrefix("thanos_", reg), filters) - cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { - api.SetLoaded(blocks, err) - }) - // Still use blockViewerSyncBlockTimeout to retain original behavior before this upstream change: - // https://github.com/databricks/thanos/commit/ab43b2b20cb42eca2668824a4084307216c6da2e#diff-6c2257b871fd1196514f664bc7e44cb21681215e0929710d0ad5ceea90b8e122R294 - // Otherwise Azure won't work due to its high latency - var syncMetasTimeout = conf.blockViewerSyncBlockTimeout - if !conf.wait { - syncMetasTimeout = 0 + insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) + + // Create tenant-specific logger + tenantLogger := logger + if tenantPrefix != "" { + tenantLogger = log.With(logger, "tenant_prefix", tenantPrefix) } - sy, err = compact.NewMetaSyncer( - logger, - reg, - insBkt, - cf, - duplicateBlocksFilter, - ignoreDeletionMarkFilter, - compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), - compactMetrics.garbageCollectedBlocks, - syncMetasTimeout, - ) + + relabelContentYaml, err := conf.selectorRelabelConf.Content() if err != nil { - return errors.Wrap(err, "create syncer") + return errors.Wrap(err, "get content of relabel configuration") } - } - levels, err := compactions.levels(conf.maxCompactionLevel) - if err != nil { - return errors.Wrap(err, "get compaction levels") - } + relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions) + if err != nil { + return err + } - if conf.maxCompactionLevel < compactions.maxLevel() { - level.Warn(logger).Log("msg", "Max compaction level is lower than should be", "current", conf.maxCompactionLevel, "default", compactions.maxLevel()) - } + // Ensure we close up everything properly. + defer func() { + if err != nil { + runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") + } + }() + + // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. + // The delay of deleteDelay/2 is added to ensure we fetch blocks that are meant to be deleted but do not have a replacement yet. + // This is to make sure compactor will not accidentally perform compactions with gap instead. + ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, deleteDelay/2, conf.blockMetaFetchConcurrency) + duplicateBlocksFilter := block.NewDeduplicateFilter(conf.blockMetaFetchConcurrency) + noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) + noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) + labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) + consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", reg)) + timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) + + var blockLister block.Lister + switch syncStrategy(conf.blockListStrategy) { + case concurrentDiscovery: + blockLister = block.NewConcurrentLister(logger, insBkt) + case recursiveDiscovery: + blockLister = block.NewRecursiveLister(logger, insBkt) + default: + return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) + } + baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + if err != nil { + return errors.Wrap(err, "create meta fetcher") + } - ctx, cancel := context.WithCancel(context.Background()) - ctx = tracing.ContextWithTracer(ctx, tracer) - ctx = objstoretracing.ContextWithTracer(ctx, tracer) // objstore tracing uses a different tracer key in context. + enableVerticalCompaction := conf.enableVerticalCompaction + dedupReplicaLabels := strutil.ParseFlagLabels(conf.dedupReplicaLabels) + if len(dedupReplicaLabels) > 0 { + enableVerticalCompaction = true + level.Info(tenantLogger).Log( + "msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(dedupReplicaLabels, ","), + ) + } + if enableVerticalCompaction { + level.Info(tenantLogger).Log( + "msg", "vertical compaction is enabled", "compact.enable-vertical-compaction", fmt.Sprintf("%v", conf.enableVerticalCompaction), + ) + } + var ( + api = blocksAPI.NewBlocksAPI(logger, conf.webConf.disableCORS, conf.label, flagsMap, insBkt) + sy *compact.Syncer + ) + { + filters := []block.MetadataFilter{ + timePartitionMetaFilter, + labelShardedMetaFilter, + consistencyDelayMetaFilter, + ignoreDeletionMarkFilter, + block.NewReplicaLabelRemover(logger, dedupReplicaLabels), + duplicateBlocksFilter, + noCompactMarkerFilter, + } + if !conf.disableDownsampling { + filters = append(filters, noDownsampleMarkerFilter) + } + // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. + cf := baseMetaFetcher.NewMetaFetcher( + extprom.WrapRegistererWithPrefix("thanos_", reg), filters) + cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { + api.SetLoaded(blocks, err) + }) - defer func() { - if rerr != nil { - cancel() + // Still use blockViewerSyncBlockTimeout to retain original behavior before this upstream change: + // https://github.com/databricks/thanos/commit/ab43b2b20cb42eca2668824a4084307216c6da2e#diff-6c2257b871fd1196514f664bc7e44cb21681215e0929710d0ad5ceea90b8e122R294 + // Otherwise Azure won't work due to its high latency + var syncMetasTimeout = conf.blockViewerSyncBlockTimeout + if !conf.wait { + syncMetasTimeout = 0 + } + sy, err = compact.NewMetaSyncer( + logger, + reg, + insBkt, + cf, + duplicateBlocksFilter, + ignoreDeletionMarkFilter, + compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), + compactMetrics.garbageCollectedBlocks, + syncMetasTimeout, + ) + if err != nil { + return errors.Wrap(err, "create syncer") + } } - }() - var mergeFunc storage.VerticalChunkSeriesMergeFunc - switch conf.dedupFunc { - case compact.DedupAlgorithmPenalty: - mergeFunc = dedup.NewChunkSeriesMerger() + levels, err := compactions.levels(conf.maxCompactionLevel) + if err != nil { + return errors.Wrap(err, "get compaction levels") + } - if len(dedupReplicaLabels) == 0 { - return errors.New("penalty based deduplication needs at least one replica label specified") + if conf.maxCompactionLevel < compactions.maxLevel() { + level.Warn(tenantLogger).Log("msg", "Max compaction level is lower than should be", "current", conf.maxCompactionLevel, "default", compactions.maxLevel()) } - case "": - mergeFunc = storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge) - default: - return errors.Errorf("unsupported deduplication func, got %s", conf.dedupFunc) - } + ctx, cancel := context.WithCancel(context.Background()) + ctx = tracing.ContextWithTracer(ctx, tracer) + ctx = objstoretracing.ContextWithTracer(ctx, tracer) // objstore tracing uses a different tracer key in context. - // Instantiate the compactor with different time slices. Timestamps in TSDB - // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool(), mergeFunc) - if err != nil { - return errors.Wrap(err, "create compactor") - } + defer func() { + if rerr != nil { + cancel() + } + }() - var ( - compactDir = path.Join(conf.dataDir, "compact") - downsamplingDir = path.Join(conf.dataDir, "downsample") - ) + var mergeFunc storage.VerticalChunkSeriesMergeFunc + switch conf.dedupFunc { + case compact.DedupAlgorithmPenalty: + mergeFunc = dedup.NewChunkSeriesMerger() - if err := os.MkdirAll(compactDir, os.ModePerm); err != nil { - return errors.Wrap(err, "create working compact directory") - } + if len(dedupReplicaLabels) == 0 { + return errors.New("penalty based deduplication needs at least one replica label specified") + } + case "": + mergeFunc = storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge) - if err := os.MkdirAll(downsamplingDir, os.ModePerm); err != nil { - return errors.Wrap(err, "create working downsample directory") - } + default: + return errors.Errorf("unsupported deduplication func, got %s", conf.dedupFunc) + } - grouper := compact.NewDefaultGrouper( - logger, - insBkt, - conf.acceptMalformedIndex, - enableVerticalCompaction, - reg, - compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), - compactMetrics.garbageCollectedBlocks, - compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), - metadata.HashFunc(conf.hashFunc), - conf.blockFilesConcurrency, - conf.compactBlocksFetchConcurrency, - ) - var planner compact.Planner - - tsdbPlanner := compact.NewPlanner(logger, levels, noCompactMarkerFilter) - largeIndexFilterPlanner := compact.WithLargeTotalIndexSizeFilter( - tsdbPlanner, - insBkt, - int64(conf.maxBlockIndexSize), - compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.IndexSizeExceedingNoCompactReason), - ) - if enableVerticalCompaction { - planner = compact.WithVerticalCompactionDownsampleFilter(largeIndexFilterPlanner, insBkt, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.DownsampleVerticalCompactionNoCompactReason)) - } else { - planner = largeIndexFilterPlanner - } - blocksCleaner := compact.NewBlocksCleaner(logger, insBkt, ignoreDeletionMarkFilter, deleteDelay, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) - compactor, err := compact.NewBucketCompactorWithCheckerAndCallback( - logger, - sy, - grouper, - planner, - comp, - compact.DefaultBlockDeletableChecker{}, - compact.NewOverlappingCompactionLifecycleCallback(reg, logger, conf.enableOverlappingRemoval), - compactDir, - insBkt, - conf.compactionConcurrency, - conf.skipBlockWithOutOfOrderChunks, - ) - if err != nil { - return errors.Wrap(err, "create bucket compactor") - } + // Instantiate the compactor with different time slices. Timestamps in TSDB + // are in milliseconds. + comp, err := tsdb.NewLeveledCompactor(ctx, reg, tenantLogger, levels, downsample.NewPool(), mergeFunc) + if err != nil { + return errors.Wrap(err, "create compactor") + } - retentionByResolution := map[compact.ResolutionLevel]time.Duration{ - compact.ResolutionLevelRaw: time.Duration(conf.retentionRaw), - compact.ResolutionLevel5m: time.Duration(conf.retentionFiveMin), - compact.ResolutionLevel1h: time.Duration(conf.retentionOneHr), - } + var ( + compactDir = path.Join(conf.dataDir, "compact") + downsamplingDir = path.Join(conf.dataDir, "downsample") + ) - if retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() != 0 { - // If downsampling is enabled, error if raw retention is not sufficient for downsampling to occur (upper bound 10 days for 1h resolution) - if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() < downsample.ResLevel1DownsampleRange { - return errors.New("raw resolution must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") + if err := os.MkdirAll(compactDir, os.ModePerm); err != nil { + return errors.Wrap(err, "create working compact directory") } - level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) - } - if retentionByResolution[compact.ResolutionLevel5m].Milliseconds() != 0 { - // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted - if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Milliseconds() < downsample.ResLevel2DownsampleRange { - return errors.New("5m resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") + + if err := os.MkdirAll(downsamplingDir, os.ModePerm); err != nil { + return errors.Wrap(err, "create working downsample directory") } - level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) - } - if retentionByResolution[compact.ResolutionLevel1h].Milliseconds() != 0 { - level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) - } - retentionByTenant, err := compact.ParesRetentionPolicyByTenant(logger, *conf.retentionTenants) - if err != nil { - level.Error(logger).Log("msg", "failed to parse retention policy by tenant", "err", err) - return err - } + grouper := compact.NewDefaultGrouper( + logger, + insBkt, + conf.acceptMalformedIndex, + enableVerticalCompaction, + reg, + compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), + compactMetrics.garbageCollectedBlocks, + compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), + metadata.HashFunc(conf.hashFunc), + conf.blockFilesConcurrency, + conf.compactBlocksFetchConcurrency, + ) + var planner compact.Planner - var cleanMtx sync.Mutex - // TODO(GiedriusS): we could also apply retention policies here but the logic would be a bit more complex. - cleanPartialMarked := func(progress *compact.Progress) error { - cleanMtx.Lock() - defer cleanMtx.Unlock() - defer progress.Idle() - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "syncing metas") + tsdbPlanner := compact.NewPlanner(logger, levels, noCompactMarkerFilter) + largeIndexFilterPlanner := compact.WithLargeTotalIndexSizeFilter( + tsdbPlanner, + insBkt, + int64(conf.maxBlockIndexSize), + compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.IndexSizeExceedingNoCompactReason), + ) + if enableVerticalCompaction { + planner = compact.WithVerticalCompactionDownsampleFilter(largeIndexFilterPlanner, insBkt, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.DownsampleVerticalCompactionNoCompactReason)) + } else { + planner = largeIndexFilterPlanner } - - progress.Set(compact.CleanBlocks) - compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), insBkt, compactMetrics.partialUploadDeleteAttempts, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) - if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { - return errors.Wrap(err, "cleaning marked blocks") + blocksCleaner := compact.NewBlocksCleaner(logger, insBkt, ignoreDeletionMarkFilter, deleteDelay, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) + compactor, err := compact.NewBucketCompactorWithCheckerAndCallback( + logger, + sy, + grouper, + planner, + comp, + compact.DefaultBlockDeletableChecker{}, + compact.NewOverlappingCompactionLifecycleCallback(reg, tenantLogger, conf.enableOverlappingRemoval), + compactDir, + insBkt, + conf.compactionConcurrency, + conf.skipBlockWithOutOfOrderChunks, + ) + if err != nil { + return errors.Wrap(err, "create bucket compactor") } - compactMetrics.cleanups.Inc() - return nil - } + retentionByResolution := map[compact.ResolutionLevel]time.Duration{ + compact.ResolutionLevelRaw: time.Duration(conf.retentionRaw), + compact.ResolutionLevel5m: time.Duration(conf.retentionFiveMin), + compact.ResolutionLevel1h: time.Duration(conf.retentionOneHr), + } - compactMainFn := func(progress *compact.Progress) error { - defer progress.Idle() - // this should happen before any compaction to remove unnecessary process on backlogs beyond retention. - if len(retentionByTenant) != 0 && len(sy.Metas()) == 0 { - level.Info(logger).Log("msg", "sync before tenant retention due to no blocks") - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before tenant retention") + if retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() != 0 { + // If downsampling is enabled, error if raw retention is not sufficient for downsampling to occur (upper bound 10 days for 1h resolution) + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() < downsample.ResLevel1DownsampleRange { + return errors.New("raw resolution must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") } + level.Info(tenantLogger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) } - - progress.Set(compact.ApplyRetention) - if err := compact.ApplyRetentionPolicyByTenant(ctx, logger, insBkt, sy.Metas(), retentionByTenant, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, metadata.TenantRetentionExpired)); err != nil { - return errors.Wrap(err, "retention by tenant failed") + if retentionByResolution[compact.ResolutionLevel5m].Milliseconds() != 0 { + // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Milliseconds() < downsample.ResLevel2DownsampleRange { + return errors.New("5m resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") + } + level.Info(tenantLogger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) + } + if retentionByResolution[compact.ResolutionLevel1h].Milliseconds() != 0 { + level.Info(tenantLogger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } - if err := compactor.Compact(ctx, progress); err != nil { - return errors.Wrap(err, "whole compaction error") + retentionByTenant, err := compact.ParesRetentionPolicyByTenant(logger, *conf.retentionTenants) + if err != nil { + level.Error(tenantLogger).Log("msg", "failed to parse retention policy by tenant", "err", err) + return err } - if !conf.disableDownsampling { - // After all compactions are done, work down the downsampling backlog. - // We run two passes of this to ensure that the 1h downsampling is generated - // for 5m downsamplings created in the first run. - level.Info(logger).Log("msg", "start first pass of downsampling") + var cleanMtx sync.Mutex + // TODO(GiedriusS): we could also apply retention policies here but the logic would be a bit more complex. + cleanPartialMarked := func(progress *compact.Progress) error { + cleanMtx.Lock() + defer cleanMtx.Unlock() + defer progress.Idle() progress.Set(compact.SyncMeta) if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before first pass of downsampling") - } - progress.Set(compact.DownSampling) - filteredMetas := sy.Metas() - noDownsampleBlocks := noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() - for ul := range noDownsampleBlocks { - delete(filteredMetas, ul) + return errors.Wrap(err, "syncing metas") } - for _, meta := range filteredMetas { - resolutionLabel := meta.Thanos.ResolutionString() - downsampleMetrics.downsamples.WithLabelValues(resolutionLabel) - downsampleMetrics.downsampleFailures.WithLabelValues(resolutionLabel) + progress.Set(compact.CleanBlocks) + compact.BestEffortCleanAbortedPartialUploads(ctx, tenantLogger, sy.Partial(), insBkt, compactMetrics.partialUploadDeleteAttempts, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) + if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { + return errors.Wrap(err, "cleaning marked blocks") } + compactMetrics.cleanups.Inc() - if err := downsampleBucket( - ctx, - logger, - downsampleMetrics, - insBkt, - filteredMetas, - downsamplingDir, - conf.downsampleConcurrency, - conf.blockFilesConcurrency, - metadata.HashFunc(conf.hashFunc), - conf.acceptMalformedIndex, - ); err != nil { - return errors.Wrap(err, "first pass of downsampling failed") - } + return nil + } - level.Info(logger).Log("msg", "start second pass of downsampling") - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before second pass of downsampling") + compactMainFn := func(progress *compact.Progress) error { + defer progress.Idle() + // this should happen before any compaction to remove unnecessary process on backlogs beyond retention. + if len(retentionByTenant) != 0 && len(sy.Metas()) == 0 { + level.Info(tenantLogger).Log("msg", "sync before tenant retention due to no blocks") + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "sync before tenant retention") + } } - // Regenerate the filtered list of blocks after the sync, - // to include the blocks created by the first pass. - filteredMetas = sy.Metas() - noDownsampleBlocks = noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() - for ul := range noDownsampleBlocks { - delete(filteredMetas, ul) + progress.Set(compact.ApplyRetention) + if err := compact.ApplyRetentionPolicyByTenant(ctx, tenantLogger, insBkt, sy.Metas(), retentionByTenant, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, metadata.TenantRetentionExpired)); err != nil { + return errors.Wrap(err, "retention by tenant failed") } - if err := downsampleBucket( - ctx, - logger, - downsampleMetrics, - insBkt, - filteredMetas, - downsamplingDir, - conf.downsampleConcurrency, - conf.blockFilesConcurrency, - metadata.HashFunc(conf.hashFunc), - conf.acceptMalformedIndex, - ); err != nil { - return errors.Wrap(err, "second pass of downsampling failed") + if err := compactor.Compact(ctx, progress); err != nil { + return errors.Wrap(err, "whole compaction error") } - level.Info(logger).Log("msg", "downsampling iterations done") - } else { - level.Info(logger).Log("msg", "downsampling was explicitly disabled") - } + if !conf.disableDownsampling { + // After all compactions are done, work down the downsampling backlog. + // We run two passes of this to ensure that the 1h downsampling is generated + // for 5m downsamplings created in the first run. + level.Info(tenantLogger).Log("msg", "start first pass of downsampling") + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "sync before first pass of downsampling") + } + progress.Set(compact.DownSampling) + filteredMetas := sy.Metas() + noDownsampleBlocks := noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() + for ul := range noDownsampleBlocks { + delete(filteredMetas, ul) + } - // TODO(bwplotka): Find a way to avoid syncing if no op was done. - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before retention") - } + for _, meta := range filteredMetas { + resolutionLabel := meta.Thanos.ResolutionString() + downsampleMetrics.downsamples.WithLabelValues(resolutionLabel) + downsampleMetrics.downsampleFailures.WithLabelValues(resolutionLabel) + } - progress.Set(compact.ApplyRetention) - if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil { - return errors.Wrap(err, "retention failed") - } + if err := downsampleBucket( + ctx, + logger, + downsampleMetrics, + insBkt, + filteredMetas, + downsamplingDir, + conf.downsampleConcurrency, + conf.blockFilesConcurrency, + metadata.HashFunc(conf.hashFunc), + conf.acceptMalformedIndex, + ); err != nil { + return errors.Wrap(err, "first pass of downsampling failed") + } - return cleanPartialMarked(progress) - } + level.Info(tenantLogger).Log("msg", "start second pass of downsampling") + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "sync before second pass of downsampling") + } - g.Add(func() error { - defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") + // Regenerate the filtered list of blocks after the sync, + // to include the blocks created by the first pass. + filteredMetas = sy.Metas() + noDownsampleBlocks = noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() + for ul := range noDownsampleBlocks { + delete(filteredMetas, ul) + } - if !conf.wait { - return compactMainFn(progressRegistry.Get(compact.Main)) - } + if err := downsampleBucket( + ctx, + logger, + downsampleMetrics, + insBkt, + filteredMetas, + downsamplingDir, + conf.downsampleConcurrency, + conf.blockFilesConcurrency, + metadata.HashFunc(conf.hashFunc), + conf.acceptMalformedIndex, + ); err != nil { + return errors.Wrap(err, "second pass of downsampling failed") + } - // --wait=true is specified. - return runutil.Repeat(conf.waitInterval, ctx.Done(), func() error { - err := compactMainFn(progressRegistry.Get(compact.Main)) - if err == nil { - compactMetrics.iterations.Inc() - return nil + level.Info(tenantLogger).Log("msg", "downsampling iterations done") + } else { + level.Info(tenantLogger).Log("msg", "downsampling was explicitly disabled") } - // The HaltError type signals that we hit a critical bug and should block - // for investigation. You should alert on this being halted. - if compact.IsHaltError(err) { - if conf.haltOnError { - level.Error(logger).Log("msg", "critical error detected; halting", "err", err) - compactMetrics.halted.Set(1) - select {} - } else { - return errors.Wrap(err, "critical error detected") - } + // TODO(bwplotka): Find a way to avoid syncing if no op was done. + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "sync before retention") } - // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered too frequently. - if compact.IsRetryError(err) { - level.Error(logger).Log("msg", "retriable error", "err", err) - compactMetrics.retried.Inc() - // TODO(bplotka): use actual "retry()" here instead of waiting 5 minutes? - return nil + progress.Set(compact.ApplyRetention) + if err := compact.ApplyRetentionPolicyByResolution(ctx, tenantLogger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil { + return errors.Wrap(err, "retention failed") } - return errors.Wrap(err, "error executing compaction") - }) - }, func(error) { - cancel() - }) + return cleanPartialMarked(progress) + } + + // For backwards compatibility, use the existing single-goroutine approach + // The bucketsToCompact[0] is already assigned to insBkt above, and all setup used insBkt + // So the existing compactMainFn will work for the first/only bucket + g.Add(func() error { + defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") - if conf.wait { - if !conf.disableWeb { - r := route.New() + if !conf.wait { + return compactMainFn(progressRegistry.Get(compact.Main)) + } - ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) + // --wait=true is specified. + return runutil.Repeat(conf.waitInterval, ctx.Done(), func() error { + err := compactMainFn(progressRegistry.Get(compact.Main)) + if err == nil { + compactMetrics.iterations.Inc() + return nil + } - global := ui.NewBucketUI(logger, conf.webConf.externalPrefix, conf.webConf.prefixHeaderName, component) - global.Register(r, ins) + // The HaltError type signals that we hit a critical bug and should block + // for investigation. You should alert on this being halted. + if compact.IsHaltError(err) { + if conf.haltOnError { + level.Error(tenantLogger).Log("msg", "critical error detected; halting", "err", err) + compactMetrics.halted.Set(1) + select {} + } else { + return errors.Wrap(err, "critical error detected") + } + } - // Configure Request Logging for HTTP calls. - opts := []logging.Option{logging.WithDecider(func(_ string, _ error) logging.Decision { - return logging.NoLogCall - })} - logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...) - api.Register(r.WithPrefix("/api/v1"), tracer, logger, ins, logMiddleware) + // The RetryError signals that we hit an retriable error (transient error, no connection). + // You should alert on this being triggered too frequently. + if compact.IsRetryError(err) { + level.Error(tenantLogger).Log("msg", "retriable error", "err", err) + compactMetrics.retried.Inc() + // TODO(bplotka): use actual "retry()" here instead of waiting 5 minutes? + return nil + } - // Separate fetcher for global view. - // TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well. - f := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") - f.UpdateOnChange(func(blocks []metadata.Meta, err error) { - api.SetGlobal(blocks, err) + return errors.Wrap(err, "error executing compaction") }) + }, func(error) { + cancel() + }) - srv.Handle("/", r) - - g.Add(func() error { - iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) - _, _, _ = f.Fetch(iterCtx) - iterCancel() + if conf.wait { + if !conf.disableWeb { + r := route.New() - // For /global state make sure to fetch periodically. - return runutil.Repeat(conf.blockViewerSyncBlockInterval, ctx.Done(), func() error { - return runutil.RetryWithLog(logger, time.Minute, ctx.Done(), func() error { - progress := progressRegistry.Get(compact.Web) - defer progress.Idle() - iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) - defer iterCancel() - progress.Set(compact.SyncMeta) - _, _, err := f.Fetch(iterCtx) - return err - }) - }) - }, func(error) { - cancel() - }) - } + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) - // Periodically remove partial blocks and blocks marked for deletion - // since one iteration potentially could take a long time. - if conf.cleanupBlocksInterval > 0 { - g.Add(func() error { - return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), func() error { - err := cleanPartialMarked(progressRegistry.Get(compact.Cleanup)) - if err != nil && compact.IsRetryError(err) { - // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered too frequently. - level.Error(logger).Log("msg", "retriable error", "err", err) - compactMetrics.retried.Inc() + global := ui.NewBucketUI(logger, conf.webConf.externalPrefix, conf.webConf.prefixHeaderName, component) + global.Register(r, ins) - return nil - } + // Configure Request Logging for HTTP calls. + opts := []logging.Option{logging.WithDecider(func(_ string, _ error) logging.Decision { + return logging.NoLogCall + })} + logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...) + api.Register(r.WithPrefix("/api/v1"), tracer, tenantLogger, ins, logMiddleware) - return err + // Separate fetcher for global view. + // TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well. + f := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") + f.UpdateOnChange(func(blocks []metadata.Meta, err error) { + api.SetGlobal(blocks, err) }) - }, func(error) { - cancel() - }) - } - // Periodically calculate the progress of compaction, downsampling and retention. - if conf.progressCalculateInterval > 0 { - g.Add(func() error { - ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner) - rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution) - var ds *compact.DownsampleProgressCalculator - if !conf.disableDownsampling { - ds = compact.NewDownsampleProgressCalculator(reg) - } + srv.Handle("/", r) + + g.Add(func() error { + iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) + _, _, _ = f.Fetch(iterCtx) + iterCancel() + + // For /global state make sure to fetch periodically. + return runutil.Repeat(conf.blockViewerSyncBlockInterval, ctx.Done(), func() error { + return runutil.RetryWithLog(logger, time.Minute, ctx.Done(), func() error { + progress := progressRegistry.Get(compact.Web) + defer progress.Idle() + iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) + defer iterCancel() + progress.Set(compact.SyncMeta) + _, _, err := f.Fetch(iterCtx) + return err + }) + }) + }, func(error) { + cancel() + }) + } - return runutil.Repeat(conf.progressCalculateInterval, ctx.Done(), func() error { - progress := progressRegistry.Get(compact.Calculate) - defer progress.Idle() - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered too frequently. - if compact.IsRetryError(err) { - level.Error(logger).Log("msg", "retriable error", "err", err) + // Periodically remove partial blocks and blocks marked for deletion + // since one iteration potentially could take a long time. + if conf.cleanupBlocksInterval > 0 { + g.Add(func() error { + return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), func() error { + err := cleanPartialMarked(progressRegistry.Get(compact.Cleanup)) + if err != nil && compact.IsRetryError(err) { + // The RetryError signals that we hit an retriable error (transient error, no connection). + // You should alert on this being triggered too frequently. + level.Error(tenantLogger).Log("msg", "retriable error", "err", err) compactMetrics.retried.Inc() return nil } - return errors.Wrapf(err, "could not sync metas") - } + return err + }) + }, func(error) { + cancel() + }) + } - metas := sy.Metas() - progress.Set(compact.Grouping) - groups, err := grouper.Groups(metas) - if err != nil { - return errors.Wrapf(err, "could not group metadata for compaction") - } - progress.Set(compact.CalculateProgress) - if err = ps.ProgressCalculate(ctx, groups); err != nil { - return errors.Wrapf(err, "could not calculate compaction progress") + // Periodically calculate the progress of compaction, downsampling and retention. + if conf.progressCalculateInterval > 0 { + g.Add(func() error { + ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner) + rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution) + var ds *compact.DownsampleProgressCalculator + if !conf.disableDownsampling { + ds = compact.NewDownsampleProgressCalculator(reg) } - progress.Set(compact.Grouping) - retGroups, err := grouper.Groups(metas) - if err != nil { - return errors.Wrapf(err, "could not group metadata for retention") - } + return runutil.Repeat(conf.progressCalculateInterval, ctx.Done(), func() error { + progress := progressRegistry.Get(compact.Calculate) + defer progress.Idle() + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + // The RetryError signals that we hit an retriable error (transient error, no connection). + // You should alert on this being triggered too frequently. + if compact.IsRetryError(err) { + level.Error(tenantLogger).Log("msg", "retriable error", "err", err) + compactMetrics.retried.Inc() - progress.Set(compact.CalculateProgress) - if err = rs.ProgressCalculate(ctx, retGroups); err != nil { - return errors.Wrapf(err, "could not calculate retention progress") - } + return nil + } - if !conf.disableDownsampling { + return errors.Wrapf(err, "could not sync metas") + } + + metas := sy.Metas() progress.Set(compact.Grouping) - groups, err = grouper.Groups(metas) + groups, err := grouper.Groups(metas) if err != nil { - return errors.Wrapf(err, "could not group metadata into downsample groups") + return errors.Wrapf(err, "could not group metadata for compaction") } progress.Set(compact.CalculateProgress) - if err := ds.ProgressCalculate(ctx, groups); err != nil { - return errors.Wrapf(err, "could not calculate downsampling progress") + if err = ps.ProgressCalculate(ctx, groups); err != nil { + return errors.Wrapf(err, "could not calculate compaction progress") } - } - return nil + progress.Set(compact.Grouping) + retGroups, err := grouper.Groups(metas) + if err != nil { + return errors.Wrapf(err, "could not group metadata for retention") + } + + progress.Set(compact.CalculateProgress) + if err = rs.ProgressCalculate(ctx, retGroups); err != nil { + return errors.Wrapf(err, "could not calculate retention progress") + } + + if !conf.disableDownsampling { + progress.Set(compact.Grouping) + groups, err = grouper.Groups(metas) + if err != nil { + return errors.Wrapf(err, "could not group metadata into downsample groups") + } + progress.Set(compact.CalculateProgress) + if err := ds.ProgressCalculate(ctx, groups); err != nil { + return errors.Wrapf(err, "could not calculate downsampling progress") + } + } + + return nil + }) + }, func(err error) { + cancel() }) - }, func(err error) { - cancel() - }) + } } - } + + } // End of tenant prefix loop level.Info(logger).Log("msg", "starting compact node") statusProber.Ready() diff --git a/cmd/thanos/compact_test.go b/cmd/thanos/compact_test.go new file mode 100644 index 00000000000..851c62ab9b7 --- /dev/null +++ b/cmd/thanos/compact_test.go @@ -0,0 +1,284 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package main + +import ( + "testing" + + "github.com/efficientgo/core/testutil" + "github.com/go-kit/log" + "github.com/thanos-io/objstore/client" + "gopkg.in/yaml.v2" +) + +// TestAccessTenantPrefixes tests the accessTenantPrefixes function +func TestAccessTenantPrefixes(t *testing.T) { + tests := []struct { + name string + bucketConfig client.BucketConfig + expectedPrefixes []string + expectError bool + }{ + { + name: "bucket config with tenant_prefixes", + bucketConfig: client.BucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + "tenant_prefixes": []interface{}{"tenant1", "tenant2", "tenant3"}, + }, + }, + expectedPrefixes: []string{"tenant1", "tenant2", "tenant3"}, + expectError: false, + }, + { + name: "bucket config without tenant_prefixes", + bucketConfig: client.BucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + }, + }, + expectedPrefixes: nil, + expectError: false, + }, + { + name: "bucket config with empty tenant_prefixes", + bucketConfig: client.BucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + "tenant_prefixes": []interface{}{}, + }, + }, + expectedPrefixes: []string{}, + expectError: false, + }, + { + name: "bucket config with single tenant_prefix", + bucketConfig: client.BucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + "tenant_prefixes": []interface{}{"tenant-a"}, + }, + }, + expectedPrefixes: []string{"tenant-a"}, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + prefixes, err := accessTenantPrefixes(tt.bucketConfig) + + if tt.expectError { + testutil.NotOk(t, err) + } else { + testutil.Ok(t, err) + testutil.Equals(t, tt.expectedPrefixes, prefixes) + } + }) + } +} + +// TestTenantBucketConfigMarshaling tests YAML marshaling/unmarshaling of TenantBucketConfig +func TestTenantBucketConfigMarshaling(t *testing.T) { + tests := []struct { + name string + config TenantBucketConfig + expectedPrefixes []string + }{ + { + name: "empty tenant prefixes", + config: TenantBucketConfig{ + TenantPrefixes: []string{}, + }, + expectedPrefixes: []string{}, + }, + { + name: "single tenant prefix", + config: TenantBucketConfig{ + TenantPrefixes: []string{"tenant-a"}, + }, + expectedPrefixes: []string{"tenant-a"}, + }, + { + name: "multiple tenant prefixes", + config: TenantBucketConfig{ + TenantPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, + }, + expectedPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Marshal to YAML + yamlBytes, err := yaml.Marshal(tt.config) + testutil.Ok(t, err) + + // Unmarshal back + var unmarshaledConfig TenantBucketConfig + err = yaml.Unmarshal(yamlBytes, &unmarshaledConfig) + testutil.Ok(t, err) + + // Verify prefixes are preserved + testutil.Equals(t, tt.expectedPrefixes, unmarshaledConfig.TenantPrefixes) + }) + } +} + +// TestTenantPrefixBucketCreation tests that buckets are created correctly with tenant prefixes +func TestTenantPrefixBucketCreation(t *testing.T) { + tests := []struct { + name string + tenantPrefixes []string + expectedPrefixes []string + }{ + { + name: "single-tenant mode (no prefixes)", + tenantPrefixes: []string{}, + expectedPrefixes: []string{""}, + }, + { + name: "multi-tenant mode with one tenant", + tenantPrefixes: []string{"tenant1"}, + expectedPrefixes: []string{"v1/raw/tenant1"}, + }, + { + name: "multi-tenant mode with multiple tenants", + tenantPrefixes: []string{"tenant1", "tenant2", "tenant3"}, + expectedPrefixes: []string{"v1/raw/tenant1", "v1/raw/tenant2", "v1/raw/tenant3"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create base bucket config + baseBucketConf := client.BucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": t.TempDir(), + }, + Prefix: "", + } + + // Add tenant_prefixes if provided + if len(tt.tenantPrefixes) > 0 { + configMap := baseBucketConf.Config.(map[string]interface{}) + configMap["tenant_prefixes"] = tt.tenantPrefixes + baseBucketConf.Config = configMap + } + + baseYaml, err := yaml.Marshal(baseBucketConf) + testutil.Ok(t, err) + + var bucketConf client.BucketConfig + err = yaml.Unmarshal(baseYaml, &bucketConf) + testutil.Ok(t, err) + + // Get tenant prefixes (simulating what runCompact does) + tenantPrefixes := []string{""} + extractedPrefixes, err := accessTenantPrefixes(bucketConf) + if err == nil && len(extractedPrefixes) > 0 { + tenantPrefixes = extractedPrefixes + } + + // Simulate what the code does: create buckets for each tenant + var actualPrefixes []string + for _, tenantPrefix := range tenantPrefixes { + if tenantPrefix != "" { + bucketConf.Prefix = "v1/raw/" + tenantPrefix + } + actualPrefixes = append(actualPrefixes, bucketConf.Prefix) + + // Verify the bucket can be created + tenantYaml, err := yaml.Marshal(bucketConf) + testutil.Ok(t, err) + + bkt, err := client.NewBucket(log.NewNopLogger(), tenantYaml, "test", nil) + testutil.Ok(t, err) + testutil.Ok(t, bkt.Close()) + } + + testutil.Equals(t, tt.expectedPrefixes, actualPrefixes) + }) + } +} + +// TestBucketConfigPrefixPreservation tests that bucket config prefix is preserved +func TestBucketConfigPrefixPreservation(t *testing.T) { + tests := []struct { + name string + prefix string + }{ + { + name: "empty prefix", + prefix: "", + }, + { + name: "simple prefix", + prefix: "tenant1", + }, + { + name: "v1/raw prefix", + prefix: "v1/raw/tenant1", + }, + { + name: "hierarchical prefix", + prefix: "org/team/tenant1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a bucket config with prefix + originalConf := client.BucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + }, + Prefix: tt.prefix, + } + + // Marshal to YAML + yamlBytes, err := yaml.Marshal(originalConf) + testutil.Ok(t, err) + + // Unmarshal back + var unmarshaledConf client.BucketConfig + err = yaml.Unmarshal(yamlBytes, &unmarshaledConf) + testutil.Ok(t, err) + + // Verify prefix is preserved + testutil.Equals(t, tt.prefix, unmarshaledConf.Prefix) + }) + } +} + +// TestTenantPrefixesFromYAML tests end-to-end tenant prefix extraction from YAML +func TestTenantPrefixesFromYAML(t *testing.T) { + yamlConfig := ` +type: FILESYSTEM +config: + directory: /tmp/test + tenant_prefixes: + - tenant-alpha + - tenant-beta + - tenant-gamma +prefix: "" +` + + var bucketConf client.BucketConfig + err := yaml.Unmarshal([]byte(yamlConfig), &bucketConf) + testutil.Ok(t, err) + + // Extract tenant prefixes + tenantPrefixes, err := accessTenantPrefixes(bucketConf) + testutil.Ok(t, err) + + expected := []string{"tenant-alpha", "tenant-beta", "tenant-gamma"} + testutil.Equals(t, expected, tenantPrefixes) +} From 3b7c3829fcd3ee239b3ad8f9ed5abf9f71a888ff Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Fri, 31 Oct 2025 00:55:17 +0000 Subject: [PATCH 02/35] Pass linter test --- cmd/thanos/compact_test.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cmd/thanos/compact_test.go b/cmd/thanos/compact_test.go index 851c62ab9b7..a0868057d01 100644 --- a/cmd/thanos/compact_test.go +++ b/cmd/thanos/compact_test.go @@ -12,7 +12,6 @@ import ( "gopkg.in/yaml.v2" ) -// TestAccessTenantPrefixes tests the accessTenantPrefixes function func TestAccessTenantPrefixes(t *testing.T) { tests := []struct { name string @@ -83,7 +82,6 @@ func TestAccessTenantPrefixes(t *testing.T) { } } -// TestTenantBucketConfigMarshaling tests YAML marshaling/unmarshaling of TenantBucketConfig func TestTenantBucketConfigMarshaling(t *testing.T) { tests := []struct { name string @@ -130,7 +128,6 @@ func TestTenantBucketConfigMarshaling(t *testing.T) { } } -// TestTenantPrefixBucketCreation tests that buckets are created correctly with tenant prefixes func TestTenantPrefixBucketCreation(t *testing.T) { tests := []struct { name string @@ -208,7 +205,6 @@ func TestTenantPrefixBucketCreation(t *testing.T) { } } -// TestBucketConfigPrefixPreservation tests that bucket config prefix is preserved func TestBucketConfigPrefixPreservation(t *testing.T) { tests := []struct { name string @@ -258,7 +254,6 @@ func TestBucketConfigPrefixPreservation(t *testing.T) { } } -// TestTenantPrefixesFromYAML tests end-to-end tenant prefix extraction from YAML func TestTenantPrefixesFromYAML(t *testing.T) { yamlConfig := ` type: FILESYSTEM From c81c5c02983915139b344ab87133fffc1de6940b Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Fri, 31 Oct 2025 06:30:12 +0000 Subject: [PATCH 03/35] Using MultiTenancyBucketConfig --- cmd/thanos/compact.go | 42 ++--- cmd/thanos/compact_test.go | 324 ++++++++++++++++++++++--------------- 2 files changed, 213 insertions(+), 153 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index bd7ed311fc8..5e093929548 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -166,21 +166,11 @@ func newCompactMetrics(reg *prometheus.Registry, deleteDelay time.Duration) *com return m } -type TenantBucketConfig struct { - TenantPrefixes []string `yaml:"tenant_prefixes"` - // Other config fields can be added here if needed -} - -func accessTenantPrefixes(bucketConf client.BucketConfig) ([]string, error) { - configBytes, err := yaml.Marshal(bucketConf.Config) - if err != nil { - return nil, errors.Wrap(err, "failed to marshal bucket config") - } - var tenantBucketConfig TenantBucketConfig - if err := yaml.Unmarshal(configBytes, &tenantBucketConfig); err != nil { - return nil, errors.Wrap(err, "failed to unmarshal into tenant bucket config") - } - return tenantBucketConfig.TenantPrefixes, nil +type MultiTenancyBucketConfig struct { + Type client.ObjProvider `yaml:"type"` + Config interface{} `yaml:"config"` + Prefix string `yaml:"prefix" default:""` + TenantPrefixes []string `yaml:"tenant_prefixes"` // Example value: "v1/raw/tenant_a,v1/raw/tenant_b,v1/raw/tenant_c" } func runCompact( @@ -226,26 +216,28 @@ func runCompact( } // Determine tenant prefixes to use (if provided) - var bucketConf client.BucketConfig - if err := yaml.Unmarshal(confContentYaml, &bucketConf); err != nil { + var multiTenancyBucketConfig MultiTenancyBucketConfig + if err := yaml.Unmarshal(confContentYaml, &multiTenancyBucketConfig); err != nil { return errors.Wrap(err, "parse bucket config") } var tenantPrefixes []string - tenantPrefixes, err = accessTenantPrefixes(bucketConf) - if err != nil || len(tenantPrefixes) == 0 { + tenantPrefixes = multiTenancyBucketConfig.TenantPrefixes + if len(tenantPrefixes) == 0 { tenantPrefixes = []string{""} - level.Info(logger).Log("msg", "tenant prefixes not found in bucket config, assuming single-tenant mode") + level.Info(logger).Log("msg", "tenant prefixes not provided by init container, assuming single-tenant mode") } else { - level.Info(logger).Log("msg", "tenant prefixes found in bucket config, running in multi-tenant mode", "prefixes", strings.Join(tenantPrefixes, ",")) + level.Info(logger).Log("msg", "tenant prefixes found, running in multi-tenant mode", "prefixes", strings.Join(tenantPrefixes, ",")) } // Start compaction for each tenant // Each will get its own bucket created via client.NewBucket with the appropriate prefix for _, tenantPrefix := range tenantPrefixes { - if tenantPrefix != "" { - bucketConf.Prefix = "v1/raw/" + tenantPrefix + bucketConf := &client.BucketConfig{ + Type: multiTenancyBucketConfig.Type, + Config: multiTenancyBucketConfig.Config, + Prefix: multiTenancyBucketConfig.Prefix + tenantPrefix, } tenantConfYaml, err := yaml.Marshal(bucketConf) @@ -255,13 +247,13 @@ func runCompact( // Create bucket for this tenant if tenantPrefix != "" { - level.Info(logger).Log("msg", "creating compactor bucket with tenant prefix", "prefix", "v1/raw/"+tenantPrefix) + level.Info(logger).Log("msg", "creating compactor bucket with tenant prefix", "prefix", tenantPrefix) } bkt, err := client.NewBucket(logger, tenantConfYaml, component.String(), nil) if conf.enableFolderDeletion { bkt, err = block.WrapWithAzDataLakeSdk(logger, tenantConfYaml, bkt) if tenantPrefix != "" { - level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled for tenant", "prefix", "v1/raw/"+tenantPrefix, "name", bkt.Name()) + level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled for tenant", "prefix", tenantPrefix, "name", bkt.Name()) } else { level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled", "name", bkt.Name()) } diff --git a/cmd/thanos/compact_test.go b/cmd/thanos/compact_test.go index a0868057d01..00755279fd7 100644 --- a/cmd/thanos/compact_test.go +++ b/cmd/thanos/compact_test.go @@ -12,102 +12,59 @@ import ( "gopkg.in/yaml.v2" ) -func TestAccessTenantPrefixes(t *testing.T) { +func TestMultiTenancyBucketConfigMarshaling(t *testing.T) { tests := []struct { name string - bucketConfig client.BucketConfig + config MultiTenancyBucketConfig expectedPrefixes []string - expectError bool }{ { - name: "bucket config with tenant_prefixes", - bucketConfig: client.BucketConfig{ + name: "empty tenant prefixes", + config: MultiTenancyBucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ - "directory": "/tmp/test", - "tenant_prefixes": []interface{}{"tenant1", "tenant2", "tenant3"}, + "directory": "/tmp/test", }, + Prefix: "", + TenantPrefixes: []string{}, }, - expectedPrefixes: []string{"tenant1", "tenant2", "tenant3"}, - expectError: false, + expectedPrefixes: []string{}, }, { - name: "bucket config without tenant_prefixes", - bucketConfig: client.BucketConfig{ + name: "single tenant prefix", + config: MultiTenancyBucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ "directory": "/tmp/test", }, + Prefix: "", + TenantPrefixes: []string{"tenant-a"}, }, - expectedPrefixes: nil, - expectError: false, + expectedPrefixes: []string{"tenant-a"}, }, { - name: "bucket config with empty tenant_prefixes", - bucketConfig: client.BucketConfig{ + name: "multiple tenant prefixes", + config: MultiTenancyBucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ - "directory": "/tmp/test", - "tenant_prefixes": []interface{}{}, + "directory": "/tmp/test", }, + Prefix: "", + TenantPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, }, - expectedPrefixes: []string{}, - expectError: false, + expectedPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, }, { - name: "bucket config with single tenant_prefix", - bucketConfig: client.BucketConfig{ + name: "with base prefix", + config: MultiTenancyBucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ - "directory": "/tmp/test", - "tenant_prefixes": []interface{}{"tenant-a"}, + "directory": "/tmp/test", }, + Prefix: "v1/raw/", + TenantPrefixes: []string{"tenant-a", "tenant-b"}, }, - expectedPrefixes: []string{"tenant-a"}, - expectError: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - prefixes, err := accessTenantPrefixes(tt.bucketConfig) - - if tt.expectError { - testutil.NotOk(t, err) - } else { - testutil.Ok(t, err) - testutil.Equals(t, tt.expectedPrefixes, prefixes) - } - }) - } -} - -func TestTenantBucketConfigMarshaling(t *testing.T) { - tests := []struct { - name string - config TenantBucketConfig - expectedPrefixes []string - }{ - { - name: "empty tenant prefixes", - config: TenantBucketConfig{ - TenantPrefixes: []string{}, - }, - expectedPrefixes: []string{}, - }, - { - name: "single tenant prefix", - config: TenantBucketConfig{ - TenantPrefixes: []string{"tenant-a"}, - }, - expectedPrefixes: []string{"tenant-a"}, - }, - { - name: "multiple tenant prefixes", - config: TenantBucketConfig{ - TenantPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, - }, - expectedPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, + expectedPrefixes: []string{"tenant-a", "tenant-b"}, }, } @@ -118,80 +75,110 @@ func TestTenantBucketConfigMarshaling(t *testing.T) { testutil.Ok(t, err) // Unmarshal back - var unmarshaledConfig TenantBucketConfig + var unmarshaledConfig MultiTenancyBucketConfig err = yaml.Unmarshal(yamlBytes, &unmarshaledConfig) testutil.Ok(t, err) // Verify prefixes are preserved testutil.Equals(t, tt.expectedPrefixes, unmarshaledConfig.TenantPrefixes) + testutil.Equals(t, tt.config.Prefix, unmarshaledConfig.Prefix) + testutil.Equals(t, tt.config.Type, unmarshaledConfig.Type) }) } } func TestTenantPrefixBucketCreation(t *testing.T) { tests := []struct { - name string - tenantPrefixes []string - expectedPrefixes []string + name string + multiTenancyConfig MultiTenancyBucketConfig + expectedPrefixes []string + expectedEffectivePrefixes []string }{ { - name: "single-tenant mode (no prefixes)", - tenantPrefixes: []string{}, - expectedPrefixes: []string{""}, + name: "single-tenant mode (no prefixes)", + multiTenancyConfig: MultiTenancyBucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + }, + Prefix: "", + TenantPrefixes: []string{}, + }, + expectedPrefixes: []string{""}, + expectedEffectivePrefixes: []string{""}, + }, + { + name: "multi-tenant mode with one tenant", + multiTenancyConfig: MultiTenancyBucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + }, + Prefix: "", + TenantPrefixes: []string{"tenant1"}, + }, + expectedPrefixes: []string{"tenant1"}, + expectedEffectivePrefixes: []string{"tenant1"}, }, { - name: "multi-tenant mode with one tenant", - tenantPrefixes: []string{"tenant1"}, - expectedPrefixes: []string{"v1/raw/tenant1"}, + name: "multi-tenant mode with multiple tenants", + multiTenancyConfig: MultiTenancyBucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + }, + Prefix: "", + TenantPrefixes: []string{"tenant1", "tenant2", "tenant3"}, + }, + expectedPrefixes: []string{"tenant1", "tenant2", "tenant3"}, + expectedEffectivePrefixes: []string{"tenant1", "tenant2", "tenant3"}, }, { - name: "multi-tenant mode with multiple tenants", - tenantPrefixes: []string{"tenant1", "tenant2", "tenant3"}, - expectedPrefixes: []string{"v1/raw/tenant1", "v1/raw/tenant2", "v1/raw/tenant3"}, + name: "multi-tenant mode with base prefix", + multiTenancyConfig: MultiTenancyBucketConfig{ + Type: client.FILESYSTEM, + Config: map[string]interface{}{ + "directory": "/tmp/test", + }, + Prefix: "v1/raw/", + TenantPrefixes: []string{"tenant1", "tenant2"}, + }, + expectedPrefixes: []string{"tenant1", "tenant2"}, + expectedEffectivePrefixes: []string{"v1/raw/tenant1", "v1/raw/tenant2"}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Create base bucket config - baseBucketConf := client.BucketConfig{ - Type: client.FILESYSTEM, - Config: map[string]interface{}{ - "directory": t.TempDir(), - }, - Prefix: "", + // Simulate what runCompact does + tenantPrefixes := tt.multiTenancyConfig.TenantPrefixes + if len(tenantPrefixes) == 0 { + tenantPrefixes = []string{""} } - // Add tenant_prefixes if provided - if len(tt.tenantPrefixes) > 0 { - configMap := baseBucketConf.Config.(map[string]interface{}) - configMap["tenant_prefixes"] = tt.tenantPrefixes - baseBucketConf.Config = configMap - } - - baseYaml, err := yaml.Marshal(baseBucketConf) - testutil.Ok(t, err) - - var bucketConf client.BucketConfig - err = yaml.Unmarshal(baseYaml, &bucketConf) - testutil.Ok(t, err) - - // Get tenant prefixes (simulating what runCompact does) - tenantPrefixes := []string{""} - extractedPrefixes, err := accessTenantPrefixes(bucketConf) - if err == nil && len(extractedPrefixes) > 0 { - tenantPrefixes = extractedPrefixes - } + testutil.Equals(t, tt.expectedPrefixes, tenantPrefixes) - // Simulate what the code does: create buckets for each tenant - var actualPrefixes []string + // Simulate bucket creation for each tenant + var actualEffectivePrefixes []string for _, tenantPrefix := range tenantPrefixes { - if tenantPrefix != "" { - bucketConf.Prefix = "v1/raw/" + tenantPrefix + bucketConf := &client.BucketConfig{ + Type: tt.multiTenancyConfig.Type, + Config: tt.multiTenancyConfig.Config, + Prefix: tt.multiTenancyConfig.Prefix + tenantPrefix, } - actualPrefixes = append(actualPrefixes, bucketConf.Prefix) + actualEffectivePrefixes = append(actualEffectivePrefixes, bucketConf.Prefix) // Verify the bucket can be created + // Use a real temp directory for filesystem buckets + if bucketConf.Type == client.FILESYSTEM { + configMap := make(map[string]interface{}) + for k, v := range bucketConf.Config.(map[string]interface{}) { + configMap[k] = v + } + configMap["directory"] = t.TempDir() + bucketConf.Config = configMap + } + tenantYaml, err := yaml.Marshal(bucketConf) testutil.Ok(t, err) @@ -200,7 +187,7 @@ func TestTenantPrefixBucketCreation(t *testing.T) { testutil.Ok(t, bkt.Close()) } - testutil.Equals(t, tt.expectedPrefixes, actualPrefixes) + testutil.Equals(t, tt.expectedEffectivePrefixes, actualEffectivePrefixes) }) } } @@ -255,25 +242,106 @@ func TestBucketConfigPrefixPreservation(t *testing.T) { } func TestTenantPrefixesFromYAML(t *testing.T) { - yamlConfig := ` + tests := []struct { + name string + yamlConfig string + expectedTenantPrefixes []string + expectedPrefix string + expectedType client.ObjProvider + }{ + { + name: "multi-tenant config without base prefix", + yamlConfig: ` type: FILESYSTEM config: directory: /tmp/test - tenant_prefixes: - - tenant-alpha - - tenant-beta - - tenant-gamma prefix: "" -` +tenant_prefixes: + - tenant-alpha + - tenant-beta + - tenant-gamma +`, + expectedTenantPrefixes: []string{"tenant-alpha", "tenant-beta", "tenant-gamma"}, + expectedPrefix: "", + expectedType: client.FILESYSTEM, + }, + { + name: "multi-tenant config with base prefix", + yamlConfig: ` +type: FILESYSTEM +config: + directory: /tmp/test +prefix: "v1/raw/" +tenant_prefixes: + - tenant-a + - tenant-b +`, + expectedTenantPrefixes: []string{"tenant-a", "tenant-b"}, + expectedPrefix: "v1/raw/", + expectedType: client.FILESYSTEM, + }, + { + name: "single-tenant config (no tenant_prefixes)", + yamlConfig: ` +type: FILESYSTEM +config: + directory: /tmp/test +prefix: "" +`, + expectedTenantPrefixes: nil, + expectedPrefix: "", + expectedType: client.FILESYSTEM, + }, + { + name: "S3 multi-tenant config", + yamlConfig: ` +type: S3 +config: + bucket: test-bucket + endpoint: s3.amazonaws.com +prefix: "data/" +tenant_prefixes: + - org1 + - org2 +`, + expectedTenantPrefixes: []string{"org1", "org2"}, + expectedPrefix: "data/", + expectedType: client.S3, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var multiTenancyConfig MultiTenancyBucketConfig + err := yaml.Unmarshal([]byte(tt.yamlConfig), &multiTenancyConfig) + testutil.Ok(t, err) + + // Verify all fields are correctly parsed + // For TenantPrefixes, both nil and empty slice are equivalent + if tt.expectedTenantPrefixes == nil && len(multiTenancyConfig.TenantPrefixes) == 0 { + // Both nil and [] are acceptable for "no tenant prefixes" + } else { + testutil.Equals(t, tt.expectedTenantPrefixes, multiTenancyConfig.TenantPrefixes) + } + testutil.Equals(t, tt.expectedPrefix, multiTenancyConfig.Prefix) + testutil.Equals(t, tt.expectedType, multiTenancyConfig.Type) - var bucketConf client.BucketConfig - err := yaml.Unmarshal([]byte(yamlConfig), &bucketConf) - testutil.Ok(t, err) + // Verify we can marshal back + yamlBytes, err := yaml.Marshal(multiTenancyConfig) + testutil.Ok(t, err) - // Extract tenant prefixes - tenantPrefixes, err := accessTenantPrefixes(bucketConf) - testutil.Ok(t, err) + // Verify we can unmarshal again and get the same result + var roundtripConfig MultiTenancyBucketConfig + err = yaml.Unmarshal(yamlBytes, &roundtripConfig) + testutil.Ok(t, err) - expected := []string{"tenant-alpha", "tenant-beta", "tenant-gamma"} - testutil.Equals(t, expected, tenantPrefixes) + // For roundtrip, verify lengths match (handles nil vs [] equivalence) + testutil.Equals(t, len(multiTenancyConfig.TenantPrefixes), len(roundtripConfig.TenantPrefixes)) + if len(multiTenancyConfig.TenantPrefixes) > 0 { + testutil.Equals(t, multiTenancyConfig.TenantPrefixes, roundtripConfig.TenantPrefixes) + } + testutil.Equals(t, multiTenancyConfig.Prefix, roundtripConfig.Prefix) + testutil.Equals(t, multiTenancyConfig.Type, roundtripConfig.Type) + }) + } } From aa20425c25ab4eabb748ee5aca74330477fda429 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Fri, 31 Oct 2025 20:11:16 +0000 Subject: [PATCH 04/35] Changed logging --- cmd/thanos/compact.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 5e093929548..8857c03c148 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -218,7 +218,7 @@ func runCompact( // Determine tenant prefixes to use (if provided) var multiTenancyBucketConfig MultiTenancyBucketConfig if err := yaml.Unmarshal(confContentYaml, &multiTenancyBucketConfig); err != nil { - return errors.Wrap(err, "parse bucket config") + return errors.Wrap(err, "failed to parse MultiTenancyBucketConfig") } var tenantPrefixes []string @@ -239,6 +239,7 @@ func runCompact( Config: multiTenancyBucketConfig.Config, Prefix: multiTenancyBucketConfig.Prefix + tenantPrefix, } + level.Info(logger).Log("msg", "starting compaction loop", "prefix", bucketConf.Prefix) tenantConfYaml, err := yaml.Marshal(bucketConf) if err != nil { From e4cf16b26fdf3cc3730289a159c3c0c7dd4604b4 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Fri, 31 Oct 2025 21:44:11 +0000 Subject: [PATCH 05/35] Changed logging again --- cmd/thanos/compact.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 8857c03c148..9e6c5245628 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -246,18 +246,10 @@ func runCompact( return errors.Wrap(err, "marshal tenant bucket config") } - // Create bucket for this tenant - if tenantPrefix != "" { - level.Info(logger).Log("msg", "creating compactor bucket with tenant prefix", "prefix", tenantPrefix) - } bkt, err := client.NewBucket(logger, tenantConfYaml, component.String(), nil) if conf.enableFolderDeletion { bkt, err = block.WrapWithAzDataLakeSdk(logger, tenantConfYaml, bkt) - if tenantPrefix != "" { - level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled for tenant", "prefix", tenantPrefix, "name", bkt.Name()) - } else { - level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled", "name", bkt.Name()) - } + level.Info(logger).Log("msg", "azdatalake sdk wrapper enabled", "prefix", bucketConf.Prefix, "name", bkt.Name()) } if err != nil { return err From 39a8f1864c68a46c91cca57d7ae87fd8e5518d30 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 03:00:44 +0000 Subject: [PATCH 06/35] Fixed NewBucket() issues --- cmd/thanos/compact.go | 1 + cmd/thanos/downsample.go | 2 +- cmd/thanos/receive.go | 2 +- cmd/thanos/rule.go | 2 +- cmd/thanos/sidecar.go | 2 +- cmd/thanos/store.go | 2 +- cmd/thanos/tools_bucket.go | 47 ++++++++++++++++++++++++++++++-------- 7 files changed, 43 insertions(+), 15 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 9e6c5245628..b038a3faff7 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -210,6 +210,7 @@ func runCompact( srv.Shutdown(err) }) + // Note: We don't use getBucketConfigContentYaml here because we need to handle the case where the objStoreConfig is a MultiTenancyBucketConfig. confContentYaml, err := conf.objStore.Content() if err != nil { return err diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 556369b0a1f..f72901650eb 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -79,7 +79,7 @@ func RunDownsample( comp component.Component, hashFunc metadata.HashFunc, ) error { - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 4409972585b..d690e57c9f0 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -215,7 +215,7 @@ func runReceive( } var bkt objstore.Bucket - confContentYaml, err := conf.objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err } diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 41d996e0200..57ef8bf6b2c 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -835,7 +835,7 @@ func runRule( }) } - confContentYaml, err := conf.objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err } diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 127584ea947..9520a42407f 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -130,7 +130,7 @@ func runSidecar( client: promclient.NewWithTracingClient(logger, httpClient, "thanos-sidecar"), } - confContentYaml, err := conf.objStore.Content() + confContentYaml, err := getBucketConfigContentYaml(&conf.objStore) if err != nil { return errors.Wrap(err, "getting object store config") } diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 795c108cd3f..959cd75c1df 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -306,7 +306,7 @@ func runStore( srv.Shutdown(err) }) - confContentYaml, err := conf.objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(&conf.objStoreConfig) if err != nil { return err } diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index e0391af15b5..f531f5f4ecb 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -322,7 +322,7 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path "or compactor is ignoring the deletion because it's compacting the block at the same time."). Default("0s")) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -334,7 +334,7 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") - backupconfContentYaml, err := objStoreBackupConfig.Content() + backupconfContentYaml, err := getBucketConfigContentYaml(objStoreBackupConfig) if err != nil { return err } @@ -406,7 +406,7 @@ func registerBucketLs(app extkingpin.AppClause, objStoreConfig *extflag.PathOrCo tbc.registerBucketLsFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -514,7 +514,7 @@ func registerBucketInspect(app extkingpin.AppClause, objStoreConfig *extflag.Pat return errors.Wrap(err, "error parsing selector flag") } - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -624,7 +624,7 @@ func registerBucketWeb(app extkingpin.AppClause, objStoreConfig *extflag.PathOrC flagsMap := getFlagsMap(cmd.Flags()) - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -811,7 +811,7 @@ func registerBucketCleanup(app extkingpin.AppClause, objStoreConfig *extflag.Pat selectorRelabelConf := extkingpin.RegisterSelectorRelabelFlags(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1079,7 +1079,7 @@ func registerBucketMarkBlock(app extkingpin.AppClause, objStoreConfig *extflag.P tbc.registerBucketMarkBlockFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1159,7 +1159,7 @@ func registerBucketRewrite(app extkingpin.AppClause, objStoreConfig *extflag.Pat toRelabel := extflag.RegisterPathOrContent(cmd, "rewrite.to-relabel-config", "YAML file that contains relabel configs that will be applied to blocks", extflag.WithEnvSubstitution()) provideChangeLog := cmd.Flag("rewrite.add-change-log", "If specified, all modifications are written to new block directory. Disable if latency is to high.").Default("true").Bool() cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1357,7 +1357,7 @@ func registerBucketRetention(app extkingpin.AppClause, objStoreConfig *extflag.P level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1457,7 +1457,7 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla return errors.Wrapf(err, "unable to access path '%s'", tbc.path) } - confContentYaml, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return errors.Wrap(err, "unable to parse objstore config") } @@ -1488,3 +1488,30 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla return nil }) } + +// getBucketConfigContentYaml converts the objStoreConfig from a MultiTenancyBucketConfig to a BucketConfig and marshals it to a YAML string. +// If the objStoreConfig is already a BucketConfig without the tenant_prefixes field, it effectively does not change anything. +func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, error) { + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return nil, err + } + + var multiTenancyBucketConfig MultiTenancyBucketConfig + if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, &multiTenancyBucketConfig); err != nil { + return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") + } + + bucketConf := &client.BucketConfig{ + Type: multiTenancyBucketConfig.Type, + Config: multiTenancyBucketConfig.Config, + Prefix: multiTenancyBucketConfig.Prefix, + } + + confContentYaml, err := yaml.Marshal(bucketConf) + if err != nil { + return nil, errors.Wrap(err, "failed to marshal BucketConfig when trying to convert from MultiTenancyBucketConfig") + } + + return confContentYaml, nil +} From fd8b9ee6a34484724aa0921e9ed24040b797c8e4 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 04:03:31 +0000 Subject: [PATCH 07/35] Changed function signature --- cmd/thanos/downsample.go | 6 +++- cmd/thanos/receive.go | 6 +++- cmd/thanos/rule.go | 6 +++- cmd/thanos/sidecar.go | 6 +++- cmd/thanos/store.go | 7 +++- cmd/thanos/tools_bucket.go | 67 +++++++++++++++++++++++++++++--------- 6 files changed, 77 insertions(+), 21 deletions(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index f72901650eb..f2fdafd7c2a 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -79,7 +79,11 @@ func RunDownsample( comp component.Component, hashFunc metadata.HashFunc, ) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index d690e57c9f0..8107157a5c4 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -215,7 +215,11 @@ func runReceive( } var bkt objstore.Bucket - confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) + confContentYamlWithTenantPrefixes, err := conf.objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 57ef8bf6b2c..2a83867be1e 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -835,7 +835,11 @@ func runRule( }) } - confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) + confContentYamlWithTenantPrefixes, err := conf.objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 9520a42407f..4aa8c7daa0c 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -130,10 +130,14 @@ func runSidecar( client: promclient.NewWithTracingClient(logger, httpClient, "thanos-sidecar"), } - confContentYaml, err := getBucketConfigContentYaml(&conf.objStore) + confContentYamlWithTenantPrefixes, err := conf.objStore.Content() if err != nil { return errors.Wrap(err, "getting object store config") } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + if err != nil { + return errors.Wrap(err, "converting object store config to BucketConfig") + } var uploads = len(confContentYaml) != 0 if !uploads { diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 959cd75c1df..438b089ebfd 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -306,10 +306,15 @@ func runStore( srv.Shutdown(err) }) - confContentYaml, err := getBucketConfigContentYaml(&conf.objStoreConfig) + confContentYamlWithTenantPrefixes, err := conf.objStoreConfig.Content() if err != nil { return err } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + if err != nil { + return err + } + customBktConfig := exthttp.DefaultCustomBucketConfig() if err := yaml.Unmarshal(confContentYaml, &customBktConfig); err != nil { return errors.Wrap(err, "parsing config YAML file") diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index f531f5f4ecb..85d4a1a9bdc 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -322,7 +322,11 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path "or compactor is ignoring the deletion because it's compacting the block at the same time."). Default("0s")) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -334,7 +338,11 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") - backupconfContentYaml, err := getBucketConfigContentYaml(objStoreBackupConfig) + backupconfContentYamlWithTenantPrefixes, err := objStoreBackupConfig.Content() + if err != nil { + return err + } + backupconfContentYaml, err := getBucketConfigContentYaml(backupconfContentYamlWithTenantPrefixes) if err != nil { return err } @@ -406,7 +414,11 @@ func registerBucketLs(app extkingpin.AppClause, objStoreConfig *extflag.PathOrCo tbc.registerBucketLsFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -514,7 +526,11 @@ func registerBucketInspect(app extkingpin.AppClause, objStoreConfig *extflag.Pat return errors.Wrap(err, "error parsing selector flag") } - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -624,7 +640,11 @@ func registerBucketWeb(app extkingpin.AppClause, objStoreConfig *extflag.PathOrC flagsMap := getFlagsMap(cmd.Flags()) - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -811,7 +831,11 @@ func registerBucketCleanup(app extkingpin.AppClause, objStoreConfig *extflag.Pat selectorRelabelConf := extkingpin.RegisterSelectorRelabelFlags(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -1079,7 +1103,11 @@ func registerBucketMarkBlock(app extkingpin.AppClause, objStoreConfig *extflag.P tbc.registerBucketMarkBlockFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -1159,7 +1187,11 @@ func registerBucketRewrite(app extkingpin.AppClause, objStoreConfig *extflag.Pat toRelabel := extflag.RegisterPathOrContent(cmd, "rewrite.to-relabel-config", "YAML file that contains relabel configs that will be applied to blocks", extflag.WithEnvSubstitution()) provideChangeLog := cmd.Flag("rewrite.add-change-log", "If specified, all modifications are written to new block directory. Disable if latency is to high.").Default("true").Bool() cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -1357,7 +1389,11 @@ func registerBucketRetention(app extkingpin.AppClause, objStoreConfig *extflag.P level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return err + } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) if err != nil { return err } @@ -1457,10 +1493,14 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla return errors.Wrapf(err, "unable to access path '%s'", tbc.path) } - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() if err != nil { return errors.Wrap(err, "unable to parse objstore config") } + confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + if err != nil { + return err + } bkt, err := client.NewBucket(logger, confContentYaml, component.Upload.String(), nil) if err != nil { @@ -1491,12 +1531,7 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla // getBucketConfigContentYaml converts the objStoreConfig from a MultiTenancyBucketConfig to a BucketConfig and marshals it to a YAML string. // If the objStoreConfig is already a BucketConfig without the tenant_prefixes field, it effectively does not change anything. -func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, error) { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return nil, err - } - +func getBucketConfigContentYaml(confContentYamlWithTenantPrefixes []byte) ([]byte, error) { var multiTenancyBucketConfig MultiTenancyBucketConfig if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, &multiTenancyBucketConfig); err != nil { return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") From d5184bc06a81ba67e10b9d7ff3130687674ba6e8 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 04:19:31 +0000 Subject: [PATCH 08/35] Revert "Changed function signature" This reverts commit fd8b9ee6a34484724aa0921e9ed24040b797c8e4. --- cmd/thanos/downsample.go | 6 +--- cmd/thanos/receive.go | 6 +--- cmd/thanos/rule.go | 6 +--- cmd/thanos/sidecar.go | 6 +--- cmd/thanos/store.go | 7 +--- cmd/thanos/tools_bucket.go | 67 +++++++++----------------------------- 6 files changed, 21 insertions(+), 77 deletions(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index f2fdafd7c2a..f72901650eb 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -79,11 +79,7 @@ func RunDownsample( comp component.Component, hashFunc metadata.HashFunc, ) error { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 8107157a5c4..d690e57c9f0 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -215,11 +215,7 @@ func runReceive( } var bkt objstore.Bucket - confContentYamlWithTenantPrefixes, err := conf.objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err } diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 2a83867be1e..57ef8bf6b2c 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -835,11 +835,7 @@ func runRule( }) } - confContentYamlWithTenantPrefixes, err := conf.objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err } diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 4aa8c7daa0c..9520a42407f 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -130,14 +130,10 @@ func runSidecar( client: promclient.NewWithTracingClient(logger, httpClient, "thanos-sidecar"), } - confContentYamlWithTenantPrefixes, err := conf.objStore.Content() + confContentYaml, err := getBucketConfigContentYaml(&conf.objStore) if err != nil { return errors.Wrap(err, "getting object store config") } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) - if err != nil { - return errors.Wrap(err, "converting object store config to BucketConfig") - } var uploads = len(confContentYaml) != 0 if !uploads { diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 438b089ebfd..959cd75c1df 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -306,15 +306,10 @@ func runStore( srv.Shutdown(err) }) - confContentYamlWithTenantPrefixes, err := conf.objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(&conf.objStoreConfig) if err != nil { return err } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) - if err != nil { - return err - } - customBktConfig := exthttp.DefaultCustomBucketConfig() if err := yaml.Unmarshal(confContentYaml, &customBktConfig); err != nil { return errors.Wrap(err, "parsing config YAML file") diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index 85d4a1a9bdc..f531f5f4ecb 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -322,11 +322,7 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path "or compactor is ignoring the deletion because it's compacting the block at the same time."). Default("0s")) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -338,11 +334,7 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") - backupconfContentYamlWithTenantPrefixes, err := objStoreBackupConfig.Content() - if err != nil { - return err - } - backupconfContentYaml, err := getBucketConfigContentYaml(backupconfContentYamlWithTenantPrefixes) + backupconfContentYaml, err := getBucketConfigContentYaml(objStoreBackupConfig) if err != nil { return err } @@ -414,11 +406,7 @@ func registerBucketLs(app extkingpin.AppClause, objStoreConfig *extflag.PathOrCo tbc.registerBucketLsFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -526,11 +514,7 @@ func registerBucketInspect(app extkingpin.AppClause, objStoreConfig *extflag.Pat return errors.Wrap(err, "error parsing selector flag") } - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -640,11 +624,7 @@ func registerBucketWeb(app extkingpin.AppClause, objStoreConfig *extflag.PathOrC flagsMap := getFlagsMap(cmd.Flags()) - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -831,11 +811,7 @@ func registerBucketCleanup(app extkingpin.AppClause, objStoreConfig *extflag.Pat selectorRelabelConf := extkingpin.RegisterSelectorRelabelFlags(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1103,11 +1079,7 @@ func registerBucketMarkBlock(app extkingpin.AppClause, objStoreConfig *extflag.P tbc.registerBucketMarkBlockFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1187,11 +1159,7 @@ func registerBucketRewrite(app extkingpin.AppClause, objStoreConfig *extflag.Pat toRelabel := extflag.RegisterPathOrContent(cmd, "rewrite.to-relabel-config", "YAML file that contains relabel configs that will be applied to blocks", extflag.WithEnvSubstitution()) provideChangeLog := cmd.Flag("rewrite.add-change-log", "If specified, all modifications are written to new block directory. Disable if latency is to high.").Default("true").Bool() cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1389,11 +1357,7 @@ func registerBucketRetention(app extkingpin.AppClause, objStoreConfig *extflag.P level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return err - } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err } @@ -1493,14 +1457,10 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla return errors.Wrapf(err, "unable to access path '%s'", tbc.path) } - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return errors.Wrap(err, "unable to parse objstore config") } - confContentYaml, err := getBucketConfigContentYaml(confContentYamlWithTenantPrefixes) - if err != nil { - return err - } bkt, err := client.NewBucket(logger, confContentYaml, component.Upload.String(), nil) if err != nil { @@ -1531,7 +1491,12 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla // getBucketConfigContentYaml converts the objStoreConfig from a MultiTenancyBucketConfig to a BucketConfig and marshals it to a YAML string. // If the objStoreConfig is already a BucketConfig without the tenant_prefixes field, it effectively does not change anything. -func getBucketConfigContentYaml(confContentYamlWithTenantPrefixes []byte) ([]byte, error) { +func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, error) { + confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() + if err != nil { + return nil, err + } + var multiTenancyBucketConfig MultiTenancyBucketConfig if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, &multiTenancyBucketConfig); err != nil { return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") From 4459d513687881a77df4a971975ae42342edae95 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 04:22:00 +0000 Subject: [PATCH 09/35] Fixed initialization error --- cmd/thanos/tools_bucket.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index f531f5f4ecb..005494a9924 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -1497,8 +1497,8 @@ func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, return nil, err } - var multiTenancyBucketConfig MultiTenancyBucketConfig - if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, &multiTenancyBucketConfig); err != nil { + multiTenancyBucketConfig := &MultiTenancyBucketConfig{} + if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, multiTenancyBucketConfig); err != nil { return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") } From eaf92a88a8dbb95d4d33a515420baa4cb9a0b0df Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 04:49:14 +0000 Subject: [PATCH 10/35] Debug messages --- cmd/thanos/tools_bucket.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index 005494a9924..9722ce00a5a 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -1497,11 +1497,16 @@ func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, return nil, err } + // DEBUG: Log input + fmt.Fprintf(os.Stderr, "DEBUG: getBucketConfigContentYaml input: %s\n", string(confContentYamlWithTenantPrefixes)) + multiTenancyBucketConfig := &MultiTenancyBucketConfig{} if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, multiTenancyBucketConfig); err != nil { return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") } + fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml parsed Type: '%s'\n", multiTenancyBucketConfig.Type) + bucketConf := &client.BucketConfig{ Type: multiTenancyBucketConfig.Type, Config: multiTenancyBucketConfig.Config, @@ -1513,5 +1518,7 @@ func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, return nil, errors.Wrap(err, "failed to marshal BucketConfig when trying to convert from MultiTenancyBucketConfig") } + fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml output: %s\n", string(confContentYaml)) + return confContentYaml, nil } From d0bd9b3a269ddabe5f2ca01de6a855f5a664ead1 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 06:29:42 +0000 Subject: [PATCH 11/35] Logging input for debugging --- cmd/thanos/downsample.go | 5 +++++ cmd/thanos/receive.go | 5 +++++ cmd/thanos/rule.go | 5 +++++ cmd/thanos/sidecar.go | 5 +++++ cmd/thanos/store.go | 5 +++++ 5 files changed, 25 insertions(+) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index f72901650eb..023174ec0b3 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -79,6 +79,11 @@ func RunDownsample( comp component.Component, hashFunc metadata.HashFunc, ) error { + content, err := objStoreConfig.Content() + if err != nil { + return errors.Wrap(err, "getting object store config") + } + level.Info(logger).Log("msg", "conf.objStore", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index d690e57c9f0..4be80db0cd4 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -215,6 +215,11 @@ func runReceive( } var bkt objstore.Bucket + content, err := conf.objStoreConfig.Content() + if err != nil { + return errors.Wrap(err, "getting object store config") + } + level.Info(logger).Log("msg", "conf.objStoreConfig", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 57ef8bf6b2c..7aabe2838e9 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -835,6 +835,11 @@ func runRule( }) } + content, err := conf.objStoreConfig.Content() + if err != nil { + return errors.Wrap(err, "getting object store config") + } + level.Info(logger).Log("msg", "conf.objStoreConfig", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 9520a42407f..849f8958138 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -130,6 +130,11 @@ func runSidecar( client: promclient.NewWithTracingClient(logger, httpClient, "thanos-sidecar"), } + content, err := conf.objStore.Content() + if err != nil { + return errors.Wrap(err, "getting object store config") + } + level.Info(logger).Log("msg", "conf.objStore", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(&conf.objStore) if err != nil { return errors.Wrap(err, "getting object store config") diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 959cd75c1df..e475bb46ddb 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -306,6 +306,11 @@ func runStore( srv.Shutdown(err) }) + content, err := conf.objStoreConfig.Content() + if err != nil { + return errors.Wrap(err, "getting object store config") + } + level.Info(logger).Log("msg", "conf.objStoreConfig", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(&conf.objStoreConfig) if err != nil { return err From 9ef964bbaac5d3f4e51506dec443300becb43635 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 22:36:25 +0000 Subject: [PATCH 12/35] Handle empty object store config case --- cmd/thanos/tools_bucket.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index 9722ce00a5a..35ddbbe6c5e 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -1497,6 +1497,12 @@ func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, return nil, err } + // If the config is empty, return empty immediately (for optional configs like sidecar) + if len(confContentYamlWithTenantPrefixes) == 0 { + fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml input is empty, returning empty\n") + return []byte{}, nil + } + // DEBUG: Log input fmt.Fprintf(os.Stderr, "DEBUG: getBucketConfigContentYaml input: %s\n", string(confContentYamlWithTenantPrefixes)) @@ -1505,6 +1511,12 @@ func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") } + // Additional safety check: if Type is empty after unmarshalling, return empty immediately + if multiTenancyBucketConfig.Type == "" { + fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml Type is empty, returning empty after unmarshalling\n") + return []byte{}, nil + } + fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml parsed Type: '%s'\n", multiTenancyBucketConfig.Type) bucketConf := &client.BucketConfig{ From 5370ee898ba01e96fa56479bf8af45ffb6be018c Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Sun, 2 Nov 2025 22:47:11 +0000 Subject: [PATCH 13/35] Removed debug messages --- cmd/thanos/downsample.go | 5 ----- cmd/thanos/receive.go | 5 ----- cmd/thanos/rule.go | 5 ----- cmd/thanos/sidecar.go | 5 ----- cmd/thanos/store.go | 5 ----- cmd/thanos/tools_bucket.go | 14 -------------- 6 files changed, 39 deletions(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 023174ec0b3..f72901650eb 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -79,11 +79,6 @@ func RunDownsample( comp component.Component, hashFunc metadata.HashFunc, ) error { - content, err := objStoreConfig.Content() - if err != nil { - return errors.Wrap(err, "getting object store config") - } - level.Info(logger).Log("msg", "conf.objStore", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) if err != nil { return err diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 4be80db0cd4..d690e57c9f0 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -215,11 +215,6 @@ func runReceive( } var bkt objstore.Bucket - content, err := conf.objStoreConfig.Content() - if err != nil { - return errors.Wrap(err, "getting object store config") - } - level.Info(logger).Log("msg", "conf.objStoreConfig", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 7aabe2838e9..57ef8bf6b2c 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -835,11 +835,6 @@ func runRule( }) } - content, err := conf.objStoreConfig.Content() - if err != nil { - return errors.Wrap(err, "getting object store config") - } - level.Info(logger).Log("msg", "conf.objStoreConfig", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) if err != nil { return err diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 849f8958138..9520a42407f 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -130,11 +130,6 @@ func runSidecar( client: promclient.NewWithTracingClient(logger, httpClient, "thanos-sidecar"), } - content, err := conf.objStore.Content() - if err != nil { - return errors.Wrap(err, "getting object store config") - } - level.Info(logger).Log("msg", "conf.objStore", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(&conf.objStore) if err != nil { return errors.Wrap(err, "getting object store config") diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index e475bb46ddb..959cd75c1df 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -306,11 +306,6 @@ func runStore( srv.Shutdown(err) }) - content, err := conf.objStoreConfig.Content() - if err != nil { - return errors.Wrap(err, "getting object store config") - } - level.Info(logger).Log("msg", "conf.objStoreConfig", "content", string(content)) confContentYaml, err := getBucketConfigContentYaml(&conf.objStoreConfig) if err != nil { return err diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index 35ddbbe6c5e..e36825d5b43 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -1499,26 +1499,14 @@ func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, // If the config is empty, return empty immediately (for optional configs like sidecar) if len(confContentYamlWithTenantPrefixes) == 0 { - fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml input is empty, returning empty\n") return []byte{}, nil } - // DEBUG: Log input - fmt.Fprintf(os.Stderr, "DEBUG: getBucketConfigContentYaml input: %s\n", string(confContentYamlWithTenantPrefixes)) - multiTenancyBucketConfig := &MultiTenancyBucketConfig{} if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, multiTenancyBucketConfig); err != nil { return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") } - // Additional safety check: if Type is empty after unmarshalling, return empty immediately - if multiTenancyBucketConfig.Type == "" { - fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml Type is empty, returning empty after unmarshalling\n") - return []byte{}, nil - } - - fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml parsed Type: '%s'\n", multiTenancyBucketConfig.Type) - bucketConf := &client.BucketConfig{ Type: multiTenancyBucketConfig.Type, Config: multiTenancyBucketConfig.Config, @@ -1530,7 +1518,5 @@ func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, return nil, errors.Wrap(err, "failed to marshal BucketConfig when trying to convert from MultiTenancyBucketConfig") } - fmt.Fprintf(os.Stderr, "DEBUG getBucketConfigContentYaml output: %s\n", string(confContentYaml)) - return confContentYaml, nil } From a451954c5d06cfa4827104088edc2e4519b9a4bc Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 19:02:55 +0000 Subject: [PATCH 14/35] Fixed registries --- cmd/thanos/compact.go | 23 +++++++++++++++-------- cmd/thanos/compact_test.go | 3 ++- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index b038a3faff7..291b10b3134 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -238,7 +238,7 @@ func runCompact( bucketConf := &client.BucketConfig{ Type: multiTenancyBucketConfig.Type, Config: multiTenancyBucketConfig.Config, - Prefix: multiTenancyBucketConfig.Prefix + tenantPrefix, + Prefix: path.Join(multiTenancyBucketConfig.Prefix, tenantPrefix), } level.Info(logger).Log("msg", "starting compaction loop", "prefix", bucketConf.Prefix) @@ -256,7 +256,14 @@ func runCompact( return err } - insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) + var tenantReg prometheus.Registerer + if tenantPrefix != "" { + // For multi-tenant mode, add tenant label to avoid metric collisions + tenantReg = prometheus.WrapRegistererWith(prometheus.Labels{"tenant": tenantPrefix}, reg) + } else { + tenantReg = reg + } + insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", tenantReg), bkt.Name())) // Create tenant-specific logger tenantLogger := logger @@ -289,7 +296,7 @@ func runCompact( noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", reg)) + consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", tenantReg)) timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) var blockLister block.Lister @@ -301,7 +308,7 @@ func runCompact( default: return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) } - baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", tenantReg)) if err != nil { return errors.Wrap(err, "create meta fetcher") } @@ -338,7 +345,7 @@ func runCompact( } // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. cf := baseMetaFetcher.NewMetaFetcher( - extprom.WrapRegistererWithPrefix("thanos_", reg), filters) + extprom.WrapRegistererWithPrefix("thanos_", tenantReg), filters) cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetLoaded(blocks, err) }) @@ -352,7 +359,7 @@ func runCompact( } sy, err = compact.NewMetaSyncer( logger, - reg, + tenantReg, insBkt, cf, duplicateBlocksFilter, @@ -402,7 +409,7 @@ func runCompact( // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(ctx, reg, tenantLogger, levels, downsample.NewPool(), mergeFunc) + comp, err := tsdb.NewLeveledCompactor(ctx, tenantReg, tenantLogger, levels, downsample.NewPool(), mergeFunc) if err != nil { return errors.Wrap(err, "create compactor") } @@ -425,7 +432,7 @@ func runCompact( insBkt, conf.acceptMalformedIndex, enableVerticalCompaction, - reg, + tenantReg, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), compactMetrics.garbageCollectedBlocks, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), diff --git a/cmd/thanos/compact_test.go b/cmd/thanos/compact_test.go index 00755279fd7..ab080885331 100644 --- a/cmd/thanos/compact_test.go +++ b/cmd/thanos/compact_test.go @@ -4,6 +4,7 @@ package main import ( + "path" "testing" "github.com/efficientgo/core/testutil" @@ -164,7 +165,7 @@ func TestTenantPrefixBucketCreation(t *testing.T) { bucketConf := &client.BucketConfig{ Type: tt.multiTenancyConfig.Type, Config: tt.multiTenancyConfig.Config, - Prefix: tt.multiTenancyConfig.Prefix + tenantPrefix, + Prefix: path.Join(tt.multiTenancyConfig.Prefix, tenantPrefix), } actualEffectivePrefixes = append(actualEffectivePrefixes, bucketConf.Prefix) From 753e560641687c187f84bdabbd8d6d7f71ca95f6 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 19:22:29 +0000 Subject: [PATCH 15/35] Changed tenant reg label to tenant_prefix --- cmd/thanos/compact.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 291b10b3134..a3dcae9c3d7 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -259,7 +259,7 @@ func runCompact( var tenantReg prometheus.Registerer if tenantPrefix != "" { // For multi-tenant mode, add tenant label to avoid metric collisions - tenantReg = prometheus.WrapRegistererWith(prometheus.Labels{"tenant": tenantPrefix}, reg) + tenantReg = prometheus.WrapRegistererWith(prometheus.Labels{"tenant_prefix": tenantPrefix}, reg) } else { tenantReg = reg } From 24b3bc2291c7e73393bb34ba4402ba8d2f5d88cf Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 19:39:27 +0000 Subject: [PATCH 16/35] Fixed reg vs tenantReg --- cmd/thanos/compact.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index a3dcae9c3d7..bf3c4b84b07 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -345,7 +345,7 @@ func runCompact( } // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. cf := baseMetaFetcher.NewMetaFetcher( - extprom.WrapRegistererWithPrefix("thanos_", tenantReg), filters) + extprom.WrapRegistererWithPrefix("thanos_", reg), filters) cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetLoaded(blocks, err) }) From d7b40b6ee9d93a7281e822b5d5bdcca7e34b3cee Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 19:58:55 +0000 Subject: [PATCH 17/35] Fixed web UI duplicate conflicts --- cmd/thanos/compact.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index bf3c4b84b07..908f0eeb3f3 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -231,6 +231,8 @@ func runCompact( level.Info(logger).Log("msg", "tenant prefixes found, running in multi-tenant mode", "prefixes", strings.Join(tenantPrefixes, ",")) } + overlappingCallback := compact.NewOverlappingCompactionLifecycleCallback(reg, logger, conf.enableOverlappingRemoval) + // Start compaction for each tenant // Each will get its own bucket created via client.NewBucket with the appropriate prefix for _, tenantPrefix := range tenantPrefixes { @@ -462,7 +464,7 @@ func runCompact( planner, comp, compact.DefaultBlockDeletableChecker{}, - compact.NewOverlappingCompactionLifecycleCallback(reg, tenantLogger, conf.enableOverlappingRemoval), + overlappingCallback, compactDir, insBkt, conf.compactionConcurrency, @@ -674,7 +676,10 @@ func runCompact( }) if conf.wait { - if !conf.disableWeb { + isFirstTenant := (tenantPrefix == tenantPrefixes[0]) + + // Only set up web UI and progress for the first tenant + if isFirstTenant && !conf.disableWeb { r := route.New() ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) @@ -743,7 +748,7 @@ func runCompact( } // Periodically calculate the progress of compaction, downsampling and retention. - if conf.progressCalculateInterval > 0 { + if isFirstTenant && conf.progressCalculateInterval > 0 { g.Add(func() error { ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner) rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution) From 08244252b1907258c4362a654ac6c618e620a495 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 20:16:20 +0000 Subject: [PATCH 18/35] Removed registerer from most tenant use cases --- cmd/thanos/compact.go | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 908f0eeb3f3..1725851332b 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -298,7 +298,12 @@ func runCompact( noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", tenantReg)) + var consistencyDelayMetaFilter *block.ConsistencyDelayMetaFilter + if tenantPrefix != "" { + consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, nil) // TODO (willh-db): revisit metrics here + } else { + consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) + } timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) var blockLister block.Lister @@ -310,7 +315,12 @@ func runCompact( default: return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) } - baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", tenantReg)) + var baseMetaFetcher *block.BaseFetcher + if tenantPrefix != "" { + baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, nil) // TODO (willh-db): revisit metrics here + } else { + baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + } if err != nil { return errors.Wrap(err, "create meta fetcher") } @@ -359,9 +369,15 @@ func runCompact( if !conf.wait { syncMetasTimeout = 0 } + var metaSyncerReg prometheus.Registerer + if tenantPrefix != "" { + metaSyncerReg = nil // TODO (willh-db): revisit metrics here + } else { + metaSyncerReg = reg + } sy, err = compact.NewMetaSyncer( logger, - tenantReg, + metaSyncerReg, insBkt, cf, duplicateBlocksFilter, @@ -411,7 +427,12 @@ func runCompact( // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(ctx, tenantReg, tenantLogger, levels, downsample.NewPool(), mergeFunc) + var comp *tsdb.LeveledCompactor + if tenantPrefix != "" { + comp, err = tsdb.NewLeveledCompactor(ctx, nil, tenantLogger, levels, downsample.NewPool(), mergeFunc) // TODO (willh-db): revisit metrics here + } else { + comp, err = tsdb.NewLeveledCompactor(ctx, reg, tenantLogger, levels, downsample.NewPool(), mergeFunc) + } if err != nil { return errors.Wrap(err, "create compactor") } @@ -429,12 +450,18 @@ func runCompact( return errors.Wrap(err, "create working downsample directory") } + var grouperReg prometheus.Registerer + if tenantPrefix != "" { + grouperReg = nil // TODO (willh-db): revisit metrics here + } else { + grouperReg = reg + } grouper := compact.NewDefaultGrouper( logger, insBkt, conf.acceptMalformedIndex, enableVerticalCompaction, - tenantReg, + grouperReg, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), compactMetrics.garbageCollectedBlocks, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), From 912c4891217cb523c4a723859416592fa014f297 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 20:40:03 +0000 Subject: [PATCH 19/35] Set reg to nil for multitenant mode --- cmd/thanos/compact.go | 47 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 38 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 1725851332b..960ca7737ec 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -258,14 +258,12 @@ func runCompact( return err } - var tenantReg prometheus.Registerer if tenantPrefix != "" { - // For multi-tenant mode, add tenant label to avoid metric collisions - tenantReg = prometheus.WrapRegistererWith(prometheus.Labels{"tenant_prefix": tenantPrefix}, reg) - } else { - tenantReg = reg + // For multi-tenant mode, we pass a nil registerer to avoid metric collisions + // TODO (willh-db): revisit metrics structure for multi-tenant mode + reg = nil } - insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", tenantReg), bkt.Name())) + insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) // Create tenant-specific logger tenantLogger := logger @@ -298,12 +296,7 @@ func runCompact( noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - var consistencyDelayMetaFilter *block.ConsistencyDelayMetaFilter - if tenantPrefix != "" { - consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, nil) // TODO (willh-db): revisit metrics here - } else { - consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) - } + consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) var blockLister block.Lister @@ -315,12 +308,7 @@ func runCompact( default: return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) } - var baseMetaFetcher *block.BaseFetcher - if tenantPrefix != "" { - baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, nil) // TODO (willh-db): revisit metrics here - } else { - baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) - } + baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) if err != nil { return errors.Wrap(err, "create meta fetcher") } @@ -369,15 +357,9 @@ func runCompact( if !conf.wait { syncMetasTimeout = 0 } - var metaSyncerReg prometheus.Registerer - if tenantPrefix != "" { - metaSyncerReg = nil // TODO (willh-db): revisit metrics here - } else { - metaSyncerReg = reg - } sy, err = compact.NewMetaSyncer( logger, - metaSyncerReg, + reg, insBkt, cf, duplicateBlocksFilter, @@ -427,12 +409,7 @@ func runCompact( // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - var comp *tsdb.LeveledCompactor - if tenantPrefix != "" { - comp, err = tsdb.NewLeveledCompactor(ctx, nil, tenantLogger, levels, downsample.NewPool(), mergeFunc) // TODO (willh-db): revisit metrics here - } else { - comp, err = tsdb.NewLeveledCompactor(ctx, reg, tenantLogger, levels, downsample.NewPool(), mergeFunc) - } + comp, err := tsdb.NewLeveledCompactor(ctx, reg, tenantLogger, levels, downsample.NewPool(), mergeFunc) if err != nil { return errors.Wrap(err, "create compactor") } @@ -450,18 +427,12 @@ func runCompact( return errors.Wrap(err, "create working downsample directory") } - var grouperReg prometheus.Registerer - if tenantPrefix != "" { - grouperReg = nil // TODO (willh-db): revisit metrics here - } else { - grouperReg = reg - } grouper := compact.NewDefaultGrouper( logger, insBkt, conf.acceptMalformedIndex, enableVerticalCompaction, - grouperReg, + reg, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), compactMetrics.garbageCollectedBlocks, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), From 2e3f4a33aab4cdfa669928af1c71afebadd1d378 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 20:49:00 +0000 Subject: [PATCH 20/35] Fixed nil pointer in insBkt --- cmd/thanos/compact.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 960ca7737ec..84fe2a3a64c 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -258,12 +258,16 @@ func runCompact( return err } + var insBkt objstore.InstrumentedBucket + if tenantPrefix != "" { // For multi-tenant mode, we pass a nil registerer to avoid metric collisions // TODO (willh-db): revisit metrics structure for multi-tenant mode reg = nil + insBkt = objstoretracing.WrapWithTraces(bkt) + } else { + insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) } - insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) // Create tenant-specific logger tenantLogger := logger From 6635cad2b7caf3c43bd67358a0284c47d7bf72b0 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 20:59:29 +0000 Subject: [PATCH 21/35] Fixes on reg --- cmd/thanos/compact.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 84fe2a3a64c..24d5f1f5c30 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -263,7 +263,6 @@ func runCompact( if tenantPrefix != "" { // For multi-tenant mode, we pass a nil registerer to avoid metric collisions // TODO (willh-db): revisit metrics structure for multi-tenant mode - reg = nil insBkt = objstoretracing.WrapWithTraces(bkt) } else { insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) @@ -300,7 +299,12 @@ func runCompact( noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) + var consistencyDelayMetaFilter *block.ConsistencyDelayMetaFilter + if tenantPrefix != "" { + consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, nil) // TODO (willh-db): revisit metrics here + } else { + consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) + } timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) var blockLister block.Lister @@ -312,7 +316,12 @@ func runCompact( default: return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) } - baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + var baseMetaFetcher *block.BaseFetcher + if tenantPrefix != "" { + baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, nil) // TODO (willh-db): revisit metrics here + } else { + baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + } if err != nil { return errors.Wrap(err, "create meta fetcher") } From 89040d30ce2629f8bc066f76036dcf1bbc0cae8a Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 21:10:19 +0000 Subject: [PATCH 22/35] Made sure reg is nil for tenants --- cmd/thanos/compact.go | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 24d5f1f5c30..73beda0e7bf 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -263,6 +263,7 @@ func runCompact( if tenantPrefix != "" { // For multi-tenant mode, we pass a nil registerer to avoid metric collisions // TODO (willh-db): revisit metrics structure for multi-tenant mode + reg = nil insBkt = objstoretracing.WrapWithTraces(bkt) } else { insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) @@ -357,8 +358,12 @@ func runCompact( filters = append(filters, noDownsampleMarkerFilter) } // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. - cf := baseMetaFetcher.NewMetaFetcher( - extprom.WrapRegistererWithPrefix("thanos_", reg), filters) + var cf *block.MetaFetcher + if tenantPrefix != "" { + cf = baseMetaFetcher.NewMetaFetcher(nil, filters) // TODO (willh-db): revisit metrics here + } else { + cf = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", reg), filters) + } cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetLoaded(blocks, err) }) @@ -707,7 +712,12 @@ func runCompact( // Separate fetcher for global view. // TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well. - f := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") + var f *block.MetaFetcher + if tenantPrefix != "" { + f = baseMetaFetcher.NewMetaFetcher(nil, nil, "component", "globalBucketUI") // TODO (willh-db): revisit metrics here + } else { + f = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") + } f.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetGlobal(blocks, err) }) From 588dd129e3d346cdcece8817f513fc8b79136941 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 3 Nov 2025 21:21:41 +0000 Subject: [PATCH 23/35] Brought back tenantReg --- cmd/thanos/compact.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 73beda0e7bf..684ca350725 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -259,14 +259,16 @@ func runCompact( } var insBkt objstore.InstrumentedBucket + var tenantReg prometheus.Registerer if tenantPrefix != "" { // For multi-tenant mode, we pass a nil registerer to avoid metric collisions // TODO (willh-db): revisit metrics structure for multi-tenant mode - reg = nil + tenantReg = nil insBkt = objstoretracing.WrapWithTraces(bkt) } else { - insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) + tenantReg = reg + insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", tenantReg), bkt.Name())) } // Create tenant-specific logger @@ -304,7 +306,7 @@ func runCompact( if tenantPrefix != "" { consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, nil) // TODO (willh-db): revisit metrics here } else { - consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) + consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", tenantReg))) } timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) @@ -321,7 +323,7 @@ func runCompact( if tenantPrefix != "" { baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, nil) // TODO (willh-db): revisit metrics here } else { - baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", tenantReg)) } if err != nil { return errors.Wrap(err, "create meta fetcher") @@ -362,7 +364,7 @@ func runCompact( if tenantPrefix != "" { cf = baseMetaFetcher.NewMetaFetcher(nil, filters) // TODO (willh-db): revisit metrics here } else { - cf = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", reg), filters) + cf = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", tenantReg), filters) } cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetLoaded(blocks, err) @@ -377,7 +379,7 @@ func runCompact( } sy, err = compact.NewMetaSyncer( logger, - reg, + tenantReg, insBkt, cf, duplicateBlocksFilter, @@ -427,7 +429,7 @@ func runCompact( // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(ctx, reg, tenantLogger, levels, downsample.NewPool(), mergeFunc) + comp, err := tsdb.NewLeveledCompactor(ctx, tenantReg, tenantLogger, levels, downsample.NewPool(), mergeFunc) if err != nil { return errors.Wrap(err, "create compactor") } @@ -450,7 +452,7 @@ func runCompact( insBkt, conf.acceptMalformedIndex, enableVerticalCompaction, - reg, + tenantReg, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), compactMetrics.garbageCollectedBlocks, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), @@ -716,7 +718,7 @@ func runCompact( if tenantPrefix != "" { f = baseMetaFetcher.NewMetaFetcher(nil, nil, "component", "globalBucketUI") // TODO (willh-db): revisit metrics here } else { - f = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") + f = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", tenantReg), nil, "component", "globalBucketUI") } f.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetGlobal(blocks, err) From 31924f9ea651717d3df901b5e537fccf107bc03c Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 10 Nov 2025 21:56:01 +0000 Subject: [PATCH 24/35] Changed tenant config file method --- cmd/thanos/compact.go | 48 +++++++++++++++++++++++++---------- cmd/thanos/downsample.go | 2 +- cmd/thanos/receive.go | 2 +- cmd/thanos/rule.go | 2 +- cmd/thanos/sidecar.go | 2 +- cmd/thanos/store.go | 2 +- cmd/thanos/tools_bucket.go | 52 ++++++++------------------------------ 7 files changed, 49 insertions(+), 61 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 684ca350725..245f02e0634 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -166,11 +166,19 @@ func newCompactMetrics(reg *prometheus.Registry, deleteDelay time.Duration) *com return m } -type MultiTenancyBucketConfig struct { - Type client.ObjProvider `yaml:"type"` - Config interface{} `yaml:"config"` - Prefix string `yaml:"prefix" default:""` - TenantPrefixes []string `yaml:"tenant_prefixes"` // Example value: "v1/raw/tenant_a,v1/raw/tenant_b,v1/raw/tenant_c" +// TenantConfig is the config file that contains the tenant prefix assignment for a pod that is running compactor. +type TenantConfig struct { + TenantPrefixes []string `yaml:"tenant_prefixes"` // Example value: "v1/raw/tenant_a,v1/raw/tenant_b,v1/raw/tenant_c" +} + +func (tc *TenantConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + if err := unmarshal(tc); err != nil { + return err + } + if tc.TenantPrefixes == nil { + tc.TenantPrefixes = []string{""} + } + return nil } func runCompact( @@ -210,20 +218,29 @@ func runCompact( srv.Shutdown(err) }) - // Note: We don't use getBucketConfigContentYaml here because we need to handle the case where the objStoreConfig is a MultiTenancyBucketConfig. confContentYaml, err := conf.objStore.Content() if err != nil { return err } - // Determine tenant prefixes to use (if provided) - var multiTenancyBucketConfig MultiTenancyBucketConfig - if err := yaml.Unmarshal(confContentYaml, &multiTenancyBucketConfig); err != nil { - return errors.Wrap(err, "failed to parse MultiTenancyBucketConfig") + var initialBucketConf client.BucketConfig + if err := yaml.Unmarshal(confContentYaml, &initialBucketConf); err != nil { + return errors.Wrap(err, "failed to parse bucket configuration") + } + + tenantConfigContentYaml, err := conf.tenantConfigFile.Content() + if err != nil { + level.Info(logger).Log("msg", "tenant configuration file not provided or invalid, assuming single-tenant mode") + tenantConfigContentYaml = []byte{} + } + + var tenantConfig TenantConfig + if err := yaml.Unmarshal(tenantConfigContentYaml, &tenantConfig); err != nil { + return errors.Wrap(err, "failed to parse tenant configuration") } var tenantPrefixes []string - tenantPrefixes = multiTenancyBucketConfig.TenantPrefixes + tenantPrefixes = tenantConfig.TenantPrefixes if len(tenantPrefixes) == 0 { tenantPrefixes = []string{""} level.Info(logger).Log("msg", "tenant prefixes not provided by init container, assuming single-tenant mode") @@ -238,9 +255,9 @@ func runCompact( for _, tenantPrefix := range tenantPrefixes { bucketConf := &client.BucketConfig{ - Type: multiTenancyBucketConfig.Type, - Config: multiTenancyBucketConfig.Config, - Prefix: path.Join(multiTenancyBucketConfig.Prefix, tenantPrefix), + Type: initialBucketConf.Type, + Config: initialBucketConf.Config, + Prefix: path.Join(initialBucketConf.Prefix, tenantPrefix), } level.Info(logger).Log("msg", "starting compaction loop", "prefix", bucketConf.Prefix) @@ -884,6 +901,7 @@ type compactConfig struct { progressCalculateInterval time.Duration filterConf *store.FilterConfig disableAdminOperations bool + tenantConfigFile extflag.PathOrContent } func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) { @@ -1000,6 +1018,8 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) { cc.selectorRelabelConf = *extkingpin.RegisterSelectorRelabelFlags(cmd) + cc.tenantConfigFile = *extflag.RegisterPathOrContent(cmd, "compact.object-storage-tenants-generated", "YAML file that contains the tenant prefix assignment for a pod that is running compactor.", extflag.WithEnvSubstitution()) + cc.webConf.registerFlag(cmd) cmd.Flag("bucket-web-label", "External block label to use as group title in the bucket web UI").StringVar(&cc.label) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index f72901650eb..556369b0a1f 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -79,7 +79,7 @@ func RunDownsample( comp component.Component, hashFunc metadata.HashFunc, ) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index d690e57c9f0..4409972585b 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -215,7 +215,7 @@ func runReceive( } var bkt objstore.Bucket - confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) + confContentYaml, err := conf.objStoreConfig.Content() if err != nil { return err } diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 57ef8bf6b2c..41d996e0200 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -835,7 +835,7 @@ func runRule( }) } - confContentYaml, err := getBucketConfigContentYaml(conf.objStoreConfig) + confContentYaml, err := conf.objStoreConfig.Content() if err != nil { return err } diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 9520a42407f..127584ea947 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -130,7 +130,7 @@ func runSidecar( client: promclient.NewWithTracingClient(logger, httpClient, "thanos-sidecar"), } - confContentYaml, err := getBucketConfigContentYaml(&conf.objStore) + confContentYaml, err := conf.objStore.Content() if err != nil { return errors.Wrap(err, "getting object store config") } diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 959cd75c1df..795c108cd3f 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -306,7 +306,7 @@ func runStore( srv.Shutdown(err) }) - confContentYaml, err := getBucketConfigContentYaml(&conf.objStoreConfig) + confContentYaml, err := conf.objStoreConfig.Content() if err != nil { return err } diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index e36825d5b43..e0391af15b5 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -322,7 +322,7 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path "or compactor is ignoring the deletion because it's compacting the block at the same time."). Default("0s")) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -334,7 +334,7 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") - backupconfContentYaml, err := getBucketConfigContentYaml(objStoreBackupConfig) + backupconfContentYaml, err := objStoreBackupConfig.Content() if err != nil { return err } @@ -406,7 +406,7 @@ func registerBucketLs(app extkingpin.AppClause, objStoreConfig *extflag.PathOrCo tbc.registerBucketLsFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -514,7 +514,7 @@ func registerBucketInspect(app extkingpin.AppClause, objStoreConfig *extflag.Pat return errors.Wrap(err, "error parsing selector flag") } - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -624,7 +624,7 @@ func registerBucketWeb(app extkingpin.AppClause, objStoreConfig *extflag.PathOrC flagsMap := getFlagsMap(cmd.Flags()) - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -811,7 +811,7 @@ func registerBucketCleanup(app extkingpin.AppClause, objStoreConfig *extflag.Pat selectorRelabelConf := extkingpin.RegisterSelectorRelabelFlags(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -1079,7 +1079,7 @@ func registerBucketMarkBlock(app extkingpin.AppClause, objStoreConfig *extflag.P tbc.registerBucketMarkBlockFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -1159,7 +1159,7 @@ func registerBucketRewrite(app extkingpin.AppClause, objStoreConfig *extflag.Pat toRelabel := extflag.RegisterPathOrContent(cmd, "rewrite.to-relabel-config", "YAML file that contains relabel configs that will be applied to blocks", extflag.WithEnvSubstitution()) provideChangeLog := cmd.Flag("rewrite.add-change-log", "If specified, all modifications are written to new block directory. Disable if latency is to high.").Default("true").Bool() cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -1357,7 +1357,7 @@ func registerBucketRetention(app extkingpin.AppClause, objStoreConfig *extflag.P level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return err } @@ -1457,7 +1457,7 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla return errors.Wrapf(err, "unable to access path '%s'", tbc.path) } - confContentYaml, err := getBucketConfigContentYaml(objStoreConfig) + confContentYaml, err := objStoreConfig.Content() if err != nil { return errors.Wrap(err, "unable to parse objstore config") } @@ -1488,35 +1488,3 @@ func registerBucketUploadBlocks(app extkingpin.AppClause, objStoreConfig *extfla return nil }) } - -// getBucketConfigContentYaml converts the objStoreConfig from a MultiTenancyBucketConfig to a BucketConfig and marshals it to a YAML string. -// If the objStoreConfig is already a BucketConfig without the tenant_prefixes field, it effectively does not change anything. -func getBucketConfigContentYaml(objStoreConfig *extflag.PathOrContent) ([]byte, error) { - confContentYamlWithTenantPrefixes, err := objStoreConfig.Content() - if err != nil { - return nil, err - } - - // If the config is empty, return empty immediately (for optional configs like sidecar) - if len(confContentYamlWithTenantPrefixes) == 0 { - return []byte{}, nil - } - - multiTenancyBucketConfig := &MultiTenancyBucketConfig{} - if err := yaml.Unmarshal(confContentYamlWithTenantPrefixes, multiTenancyBucketConfig); err != nil { - return nil, errors.Wrap(err, "failed to parse MultiTenancyBucketConfig when trying to convert to BucketConfig") - } - - bucketConf := &client.BucketConfig{ - Type: multiTenancyBucketConfig.Type, - Config: multiTenancyBucketConfig.Config, - Prefix: multiTenancyBucketConfig.Prefix, - } - - confContentYaml, err := yaml.Marshal(bucketConf) - if err != nil { - return nil, errors.Wrap(err, "failed to marshal BucketConfig when trying to convert from MultiTenancyBucketConfig") - } - - return confContentYaml, nil -} From a90099a3562165236e46c25fbb3ac50440dd3a8a Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 10 Nov 2025 23:19:40 +0000 Subject: [PATCH 25/35] Fixed compact test --- cmd/thanos/compact.go | 10 --- cmd/thanos/compact_test.go | 165 +++++++++++++++++++------------------ 2 files changed, 84 insertions(+), 91 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 245f02e0634..ea1cb9c841d 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -171,16 +171,6 @@ type TenantConfig struct { TenantPrefixes []string `yaml:"tenant_prefixes"` // Example value: "v1/raw/tenant_a,v1/raw/tenant_b,v1/raw/tenant_c" } -func (tc *TenantConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { - if err := unmarshal(tc); err != nil { - return err - } - if tc.TenantPrefixes == nil { - tc.TenantPrefixes = []string{""} - } - return nil -} - func runCompact( g *run.Group, logger log.Logger, diff --git a/cmd/thanos/compact_test.go b/cmd/thanos/compact_test.go index ab080885331..f07585726de 100644 --- a/cmd/thanos/compact_test.go +++ b/cmd/thanos/compact_test.go @@ -13,59 +13,39 @@ import ( "gopkg.in/yaml.v2" ) -func TestMultiTenancyBucketConfigMarshaling(t *testing.T) { +func TestTenantConfigMarshaling(t *testing.T) { tests := []struct { name string - config MultiTenancyBucketConfig + config TenantConfig expectedPrefixes []string }{ { name: "empty tenant prefixes", - config: MultiTenancyBucketConfig{ - Type: client.FILESYSTEM, - Config: map[string]interface{}{ - "directory": "/tmp/test", - }, - Prefix: "", + config: TenantConfig{ TenantPrefixes: []string{}, }, expectedPrefixes: []string{}, }, { name: "single tenant prefix", - config: MultiTenancyBucketConfig{ - Type: client.FILESYSTEM, - Config: map[string]interface{}{ - "directory": "/tmp/test", - }, - Prefix: "", + config: TenantConfig{ TenantPrefixes: []string{"tenant-a"}, }, expectedPrefixes: []string{"tenant-a"}, }, { name: "multiple tenant prefixes", - config: MultiTenancyBucketConfig{ - Type: client.FILESYSTEM, - Config: map[string]interface{}{ - "directory": "/tmp/test", - }, - Prefix: "", + config: TenantConfig{ TenantPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, }, expectedPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, }, { - name: "with base prefix", - config: MultiTenancyBucketConfig{ - Type: client.FILESYSTEM, - Config: map[string]interface{}{ - "directory": "/tmp/test", - }, - Prefix: "v1/raw/", - TenantPrefixes: []string{"tenant-a", "tenant-b"}, + name: "with full path prefixes", + config: TenantConfig{ + TenantPrefixes: []string{"v1/raw/tenant-a", "v1/raw/tenant-b"}, }, - expectedPrefixes: []string{"tenant-a", "tenant-b"}, + expectedPrefixes: []string{"v1/raw/tenant-a", "v1/raw/tenant-b"}, }, } @@ -76,14 +56,12 @@ func TestMultiTenancyBucketConfigMarshaling(t *testing.T) { testutil.Ok(t, err) // Unmarshal back - var unmarshaledConfig MultiTenancyBucketConfig + var unmarshaledConfig TenantConfig err = yaml.Unmarshal(yamlBytes, &unmarshaledConfig) testutil.Ok(t, err) - // Verify prefixes are preserved + // Verify prefixes are preserved (UnmarshalYAML converts empty to [""]) testutil.Equals(t, tt.expectedPrefixes, unmarshaledConfig.TenantPrefixes) - testutil.Equals(t, tt.config.Prefix, unmarshaledConfig.Prefix) - testutil.Equals(t, tt.config.Type, unmarshaledConfig.Type) }) } } @@ -91,18 +69,21 @@ func TestMultiTenancyBucketConfigMarshaling(t *testing.T) { func TestTenantPrefixBucketCreation(t *testing.T) { tests := []struct { name string - multiTenancyConfig MultiTenancyBucketConfig + bucketConfig client.BucketConfig + tenantConfig TenantConfig expectedPrefixes []string expectedEffectivePrefixes []string }{ { name: "single-tenant mode (no prefixes)", - multiTenancyConfig: MultiTenancyBucketConfig{ + bucketConfig: client.BucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ "directory": "/tmp/test", }, - Prefix: "", + Prefix: "", + }, + tenantConfig: TenantConfig{ TenantPrefixes: []string{}, }, expectedPrefixes: []string{""}, @@ -110,12 +91,14 @@ func TestTenantPrefixBucketCreation(t *testing.T) { }, { name: "multi-tenant mode with one tenant", - multiTenancyConfig: MultiTenancyBucketConfig{ + bucketConfig: client.BucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ "directory": "/tmp/test", }, - Prefix: "", + Prefix: "", + }, + tenantConfig: TenantConfig{ TenantPrefixes: []string{"tenant1"}, }, expectedPrefixes: []string{"tenant1"}, @@ -123,12 +106,14 @@ func TestTenantPrefixBucketCreation(t *testing.T) { }, { name: "multi-tenant mode with multiple tenants", - multiTenancyConfig: MultiTenancyBucketConfig{ + bucketConfig: client.BucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ "directory": "/tmp/test", }, - Prefix: "", + Prefix: "", + }, + tenantConfig: TenantConfig{ TenantPrefixes: []string{"tenant1", "tenant2", "tenant3"}, }, expectedPrefixes: []string{"tenant1", "tenant2", "tenant3"}, @@ -136,12 +121,14 @@ func TestTenantPrefixBucketCreation(t *testing.T) { }, { name: "multi-tenant mode with base prefix", - multiTenancyConfig: MultiTenancyBucketConfig{ + bucketConfig: client.BucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ "directory": "/tmp/test", }, - Prefix: "v1/raw/", + Prefix: "v1/raw/", + }, + tenantConfig: TenantConfig{ TenantPrefixes: []string{"tenant1", "tenant2"}, }, expectedPrefixes: []string{"tenant1", "tenant2"}, @@ -152,7 +139,7 @@ func TestTenantPrefixBucketCreation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Simulate what runCompact does - tenantPrefixes := tt.multiTenancyConfig.TenantPrefixes + tenantPrefixes := tt.tenantConfig.TenantPrefixes if len(tenantPrefixes) == 0 { tenantPrefixes = []string{""} } @@ -163,9 +150,9 @@ func TestTenantPrefixBucketCreation(t *testing.T) { var actualEffectivePrefixes []string for _, tenantPrefix := range tenantPrefixes { bucketConf := &client.BucketConfig{ - Type: tt.multiTenancyConfig.Type, - Config: tt.multiTenancyConfig.Config, - Prefix: path.Join(tt.multiTenancyConfig.Prefix, tenantPrefix), + Type: tt.bucketConfig.Type, + Config: tt.bucketConfig.Config, + Prefix: path.Join(tt.bucketConfig.Prefix, tenantPrefix), } actualEffectivePrefixes = append(actualEffectivePrefixes, bucketConf.Prefix) @@ -245,22 +232,25 @@ func TestBucketConfigPrefixPreservation(t *testing.T) { func TestTenantPrefixesFromYAML(t *testing.T) { tests := []struct { name string - yamlConfig string + tenantYamlConfig string + bucketYamlConfig string expectedTenantPrefixes []string expectedPrefix string expectedType client.ObjProvider }{ { name: "multi-tenant config without base prefix", - yamlConfig: ` -type: FILESYSTEM -config: - directory: /tmp/test -prefix: "" + tenantYamlConfig: ` tenant_prefixes: - tenant-alpha - tenant-beta - tenant-gamma +`, + bucketYamlConfig: ` +type: FILESYSTEM +config: + directory: /tmp/test +prefix: "" `, expectedTenantPrefixes: []string{"tenant-alpha", "tenant-beta", "tenant-gamma"}, expectedPrefix: "", @@ -268,22 +258,25 @@ tenant_prefixes: }, { name: "multi-tenant config with base prefix", - yamlConfig: ` + tenantYamlConfig: ` +tenant_prefixes: + - tenant-a + - tenant-b +`, + bucketYamlConfig: ` type: FILESYSTEM config: directory: /tmp/test prefix: "v1/raw/" -tenant_prefixes: - - tenant-a - - tenant-b `, expectedTenantPrefixes: []string{"tenant-a", "tenant-b"}, expectedPrefix: "v1/raw/", expectedType: client.FILESYSTEM, }, { - name: "single-tenant config (no tenant_prefixes)", - yamlConfig: ` + name: "single-tenant config (empty YAML)", + tenantYamlConfig: `{}`, + bucketYamlConfig: ` type: FILESYSTEM config: directory: /tmp/test @@ -295,15 +288,17 @@ prefix: "" }, { name: "S3 multi-tenant config", - yamlConfig: ` + tenantYamlConfig: ` +tenant_prefixes: + - org1 + - org2 +`, + bucketYamlConfig: ` type: S3 config: bucket: test-bucket endpoint: s3.amazonaws.com prefix: "data/" -tenant_prefixes: - - org1 - - org2 `, expectedTenantPrefixes: []string{"org1", "org2"}, expectedPrefix: "data/", @@ -313,36 +308,44 @@ tenant_prefixes: for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var multiTenancyConfig MultiTenancyBucketConfig - err := yaml.Unmarshal([]byte(tt.yamlConfig), &multiTenancyConfig) + var tenantConfig TenantConfig + err := yaml.Unmarshal([]byte(tt.tenantYamlConfig), &tenantConfig) + testutil.Ok(t, err) + + var bucketConfig client.BucketConfig + err = yaml.Unmarshal([]byte(tt.bucketYamlConfig), &bucketConfig) testutil.Ok(t, err) // Verify all fields are correctly parsed - // For TenantPrefixes, both nil and empty slice are equivalent - if tt.expectedTenantPrefixes == nil && len(multiTenancyConfig.TenantPrefixes) == 0 { - // Both nil and [] are acceptable for "no tenant prefixes" - } else { - testutil.Equals(t, tt.expectedTenantPrefixes, multiTenancyConfig.TenantPrefixes) - } - testutil.Equals(t, tt.expectedPrefix, multiTenancyConfig.Prefix) - testutil.Equals(t, tt.expectedType, multiTenancyConfig.Type) + testutil.Equals(t, tt.expectedTenantPrefixes, tenantConfig.TenantPrefixes) + testutil.Equals(t, tt.expectedPrefix, bucketConfig.Prefix) + testutil.Equals(t, tt.expectedType, bucketConfig.Type) // Verify we can marshal back - yamlBytes, err := yaml.Marshal(multiTenancyConfig) + tenantYamlBytes, err := yaml.Marshal(tenantConfig) + testutil.Ok(t, err) + + bucketYamlBytes, err := yaml.Marshal(bucketConfig) testutil.Ok(t, err) // Verify we can unmarshal again and get the same result - var roundtripConfig MultiTenancyBucketConfig - err = yaml.Unmarshal(yamlBytes, &roundtripConfig) + var roundtripTenantConfig TenantConfig + err = yaml.Unmarshal(tenantYamlBytes, &roundtripTenantConfig) testutil.Ok(t, err) - // For roundtrip, verify lengths match (handles nil vs [] equivalence) - testutil.Equals(t, len(multiTenancyConfig.TenantPrefixes), len(roundtripConfig.TenantPrefixes)) - if len(multiTenancyConfig.TenantPrefixes) > 0 { - testutil.Equals(t, multiTenancyConfig.TenantPrefixes, roundtripConfig.TenantPrefixes) + var roundtripBucketConfig client.BucketConfig + err = yaml.Unmarshal(bucketYamlBytes, &roundtripBucketConfig) + testutil.Ok(t, err) + + // For roundtrip, verify configs match + // Note: nil and empty slice are equivalent for our purposes + if len(tenantConfig.TenantPrefixes) == 0 && len(roundtripTenantConfig.TenantPrefixes) == 0 { + // Both are empty (nil or []string{}), which is fine + } else { + testutil.Equals(t, tenantConfig.TenantPrefixes, roundtripTenantConfig.TenantPrefixes) } - testutil.Equals(t, multiTenancyConfig.Prefix, roundtripConfig.Prefix) - testutil.Equals(t, multiTenancyConfig.Type, roundtripConfig.Type) + testutil.Equals(t, bucketConfig.Prefix, roundtripBucketConfig.Prefix) + testutil.Equals(t, bucketConfig.Type, roundtripBucketConfig.Type) }) } } From 9a2f080d145866c459534c9018497f2f878bb5f5 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Tue, 11 Nov 2025 00:32:26 +0000 Subject: [PATCH 26/35] Changed cmd line flag --- cmd/thanos/compact.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index ea1cb9c841d..f4e56c2b93b 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -1008,7 +1008,7 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) { cc.selectorRelabelConf = *extkingpin.RegisterSelectorRelabelFlags(cmd) - cc.tenantConfigFile = *extflag.RegisterPathOrContent(cmd, "compact.object-storage-tenants-generated", "YAML file that contains the tenant prefix assignment for a pod that is running compactor.", extflag.WithEnvSubstitution()) + cc.tenantConfigFile = *extflag.RegisterPathOrContent(cmd, "compact.tenant-config", "YAML file that contains the tenant prefix assignment for a pod that is running compactor.", extflag.WithEnvSubstitution()) cc.webConf.registerFlag(cmd) From fb8a05c4e1fdc50d186b05174f388eeada200155 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 1 Dec 2025 18:58:18 +0000 Subject: [PATCH 27/35] New version of tenant partitoning --- cmd/thanos/compact.go | 98 +++++++++++++++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index f4e56c2b93b..f378d8f18b6 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -100,6 +100,11 @@ func registerCompact(app *extkingpin.App) { }) } +func extractOrdinalFromHostname(hostname string) (int, error) { + parts := strings.Split(hostname, "-") + return strconv.Atoi(parts[len(parts)-1]) +} + type compactMetrics struct { halted prometheus.Gauge retried prometheus.Counter @@ -166,11 +171,6 @@ func newCompactMetrics(reg *prometheus.Registry, deleteDelay time.Duration) *com return m } -// TenantConfig is the config file that contains the tenant prefix assignment for a pod that is running compactor. -type TenantConfig struct { - TenantPrefixes []string `yaml:"tenant_prefixes"` // Example value: "v1/raw/tenant_a,v1/raw/tenant_b,v1/raw/tenant_c" -} - func runCompact( g *run.Group, logger log.Logger, @@ -218,24 +218,66 @@ func runCompact( return errors.Wrap(err, "failed to parse bucket configuration") } - tenantConfigContentYaml, err := conf.tenantConfigFile.Content() - if err != nil { - level.Info(logger).Log("msg", "tenant configuration file not provided or invalid, assuming single-tenant mode") - tenantConfigContentYaml = []byte{} - } + // Setup tenant partitioning if enabled + var tenantPrefixes []string - var tenantConfig TenantConfig - if err := yaml.Unmarshal(tenantConfigContentYaml, &tenantConfig); err != nil { - return errors.Wrap(err, "failed to parse tenant configuration") - } + if conf.enableTenantPathPrefix { - var tenantPrefixes []string - tenantPrefixes = tenantConfig.TenantPrefixes - if len(tenantPrefixes) == 0 { - tenantPrefixes = []string{""} - level.Info(logger).Log("msg", "tenant prefixes not provided by init container, assuming single-tenant mode") + hostname := os.Getenv("HOSTNAME") + ordinal, err := extractOrdinalFromHostname(hostname) + if err != nil { + return errors.Wrapf(err, "failed to extract ordinal from hostname %s", hostname) + } + + if ordinal >= conf.totalShards { + return errors.Errorf("ordinal (%d) must be less than total-shards (%d)", ordinal, conf.totalShards) + } + + // Read tenant weights file path + tenantWeightsPath := conf.tenantWeightsFile.Path() + if tenantWeightsPath == "" { + return errors.New("tenant weights file required when using tenant partitioning") + } + + // Create a temporary bucket for tenant discovery (without prefix) + discoveryBkt, err := client.NewBucket(logger, confContentYaml, component.String(), nil) + if err != nil { + return errors.Wrap(err, "failed to create discovery bucket") + } + defer runutil.CloseWithLogOnErr(logger, discoveryBkt, "discovery bucket client") + + level.Info(logger).Log("msg", "setting up tenant partitioning", + "ordinal", ordinal, + "total_shards", conf.totalShards, + "common_path_prefix", conf.commonPathPrefix) + + // Get tenant assignments for this shard + ctx := context.Background() + tenantAssignments, err := compact.SetupTenantPartitioning(ctx, discoveryBkt, logger, tenantWeightsPath, conf.commonPathPrefix, conf.totalShards) + if err != nil { + return errors.Wrap(err, "failed to setup tenant partitioning") + } + + // Get tenants assigned to this shard + assignedTenants := tenantAssignments[ordinal] + if len(assignedTenants) == 0 { + level.Warn(logger).Log("msg", "no tenants assigned to this shard", "ordinal", ordinal) + return nil // No tenants to compact + } + + // Build tenant prefixes from assigned tenants + for _, tenant := range assignedTenants { + tenantPrefixes = append(tenantPrefixes, path.Join(conf.commonPathPrefix, tenant)) + } + + level.Info(logger).Log("msg", "tenant partitioning setup complete", + "ordinal", ordinal, + "assigned_tenants", len(assignedTenants), + "tenants", strings.Join(assignedTenants, ",")) } else { - level.Info(logger).Log("msg", "tenant prefixes found, running in multi-tenant mode", "prefixes", strings.Join(tenantPrefixes, ",")) + // Single-tenant mode + tenantPrefixes = []string{""} + level.Info(logger).Log("msg", "running in single-tenant mode") } overlappingCallback := compact.NewOverlappingCompactionLifecycleCallback(reg, logger, conf.enableOverlappingRemoval) @@ -891,7 +933,10 @@ type compactConfig struct { progressCalculateInterval time.Duration filterConf *store.FilterConfig disableAdminOperations bool - tenantConfigFile extflag.PathOrContent + tenantWeightsFile extflag.PathOrContent + totalShards int + commonPathPrefix string + enableTenantPathPrefix bool } func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) { @@ -1008,7 +1053,16 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) { cc.selectorRelabelConf = *extkingpin.RegisterSelectorRelabelFlags(cmd) - cc.tenantConfigFile = *extflag.RegisterPathOrContent(cmd, "compact.tenant-config", "YAML file that contains the tenant prefix assignment for a pod that is running compactor.", extflag.WithEnvSubstitution()) + cc.tenantWeightsFile = *extflag.RegisterPathOrContent(cmd, "compact.tenant-weights", "YAML file that contains the tenant weights for tenant partitioning.", extflag.WithEnvSubstitution()) + + cmd.Flag("compact.total-shards", "Total number of shards when using tenant partitioning."). + Default("1").IntVar(&cc.totalShards) + + cmd.Flag("compact.common-path-prefix", "Common path prefix for tenant discovery when using tenant partitioning. This is the prefix before the tenant name in the object storage path."). + Default("v1/raw/").StringVar(&cc.commonPathPrefix) + + cmd.Flag("compact.enable-tenant-path-prefix", "Enable tenant path prefix mode for backward compatibility. When disabled, compactor runs in single-tenant mode."). + Default("false").BoolVar(&cc.enableTenantPathPrefix) cc.webConf.registerFlag(cmd) From 2da9e2e77de7a2dc2d6cc19636d687e6de0ccea0 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 1 Dec 2025 19:04:04 +0000 Subject: [PATCH 28/35] fixed tests --- cmd/thanos/compact_test.go | 225 +++++++++++++++---------------------- 1 file changed, 88 insertions(+), 137 deletions(-) diff --git a/cmd/thanos/compact_test.go b/cmd/thanos/compact_test.go index f07585726de..633009b4b4e 100644 --- a/cmd/thanos/compact_test.go +++ b/cmd/thanos/compact_test.go @@ -13,55 +13,69 @@ import ( "gopkg.in/yaml.v2" ) -func TestTenantConfigMarshaling(t *testing.T) { +func TestExtractOrdinalFromHostname(t *testing.T) { tests := []struct { - name string - config TenantConfig - expectedPrefixes []string + name string + hostname string + expectedOrdinal int + expectError bool }{ { - name: "empty tenant prefixes", - config: TenantConfig{ - TenantPrefixes: []string{}, - }, - expectedPrefixes: []string{}, + name: "statefulset hostname with single digit", + hostname: "pantheon-compactor-0", + expectedOrdinal: 0, + expectError: false, }, { - name: "single tenant prefix", - config: TenantConfig{ - TenantPrefixes: []string{"tenant-a"}, - }, - expectedPrefixes: []string{"tenant-a"}, + name: "statefulset hostname with double digit", + hostname: "pantheon-compactor-15", + expectedOrdinal: 15, + expectError: false, }, { - name: "multiple tenant prefixes", - config: TenantConfig{ - TenantPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, - }, - expectedPrefixes: []string{"tenant-a", "tenant-b", "tenant-c"}, + name: "statefulset hostname with triple digit", + hostname: "pantheon-compactor-999", + expectedOrdinal: 999, + expectError: false, }, { - name: "with full path prefixes", - config: TenantConfig{ - TenantPrefixes: []string{"v1/raw/tenant-a", "v1/raw/tenant-b"}, - }, - expectedPrefixes: []string{"v1/raw/tenant-a", "v1/raw/tenant-b"}, + name: "kubernetes statefulset with namespace", + hostname: "pantheon-compactor-2", + expectedOrdinal: 2, + expectError: false, + }, + { + name: "complex statefulset name", + hostname: "pantheon-compactor-7", + expectedOrdinal: 7, + expectError: false, + }, + { + name: "hostname without number", + hostname: "pantheoncompactor", + expectError: true, + }, + { + name: "hostname with invalid suffix", + hostname: "pantheon-compactor-abc", + expectError: true, + }, + { + name: "empty hostname", + hostname: "", + expectError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Marshal to YAML - yamlBytes, err := yaml.Marshal(tt.config) - testutil.Ok(t, err) - - // Unmarshal back - var unmarshaledConfig TenantConfig - err = yaml.Unmarshal(yamlBytes, &unmarshaledConfig) - testutil.Ok(t, err) - - // Verify prefixes are preserved (UnmarshalYAML converts empty to [""]) - testutil.Equals(t, tt.expectedPrefixes, unmarshaledConfig.TenantPrefixes) + ordinal, err := extractOrdinalFromHostname(tt.hostname) + if tt.expectError { + testutil.NotOk(t, err) + } else { + testutil.Ok(t, err) + testutil.Equals(t, tt.expectedOrdinal, ordinal) + } }) } } @@ -70,8 +84,7 @@ func TestTenantPrefixBucketCreation(t *testing.T) { tests := []struct { name string bucketConfig client.BucketConfig - tenantConfig TenantConfig - expectedPrefixes []string + tenantPrefixes []string expectedEffectivePrefixes []string }{ { @@ -83,14 +96,11 @@ func TestTenantPrefixBucketCreation(t *testing.T) { }, Prefix: "", }, - tenantConfig: TenantConfig{ - TenantPrefixes: []string{}, - }, - expectedPrefixes: []string{""}, + tenantPrefixes: []string{""}, expectedEffectivePrefixes: []string{""}, }, { - name: "multi-tenant mode with one tenant", + name: "tenant partitioning with one tenant", bucketConfig: client.BucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ @@ -98,14 +108,11 @@ func TestTenantPrefixBucketCreation(t *testing.T) { }, Prefix: "", }, - tenantConfig: TenantConfig{ - TenantPrefixes: []string{"tenant1"}, - }, - expectedPrefixes: []string{"tenant1"}, - expectedEffectivePrefixes: []string{"tenant1"}, + tenantPrefixes: []string{"v1/raw/tenant1"}, + expectedEffectivePrefixes: []string{"v1/raw/tenant1"}, }, { - name: "multi-tenant mode with multiple tenants", + name: "tenant partitioning with multiple tenants", bucketConfig: client.BucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ @@ -113,42 +120,28 @@ func TestTenantPrefixBucketCreation(t *testing.T) { }, Prefix: "", }, - tenantConfig: TenantConfig{ - TenantPrefixes: []string{"tenant1", "tenant2", "tenant3"}, - }, - expectedPrefixes: []string{"tenant1", "tenant2", "tenant3"}, - expectedEffectivePrefixes: []string{"tenant1", "tenant2", "tenant3"}, + tenantPrefixes: []string{"v1/raw/tenant1", "v1/raw/tenant2", "v1/raw/tenant3"}, + expectedEffectivePrefixes: []string{"v1/raw/tenant1", "v1/raw/tenant2", "v1/raw/tenant3"}, }, { - name: "multi-tenant mode with base prefix", + name: "tenant partitioning with base prefix and tenant paths", bucketConfig: client.BucketConfig{ Type: client.FILESYSTEM, Config: map[string]interface{}{ "directory": "/tmp/test", }, - Prefix: "v1/raw/", - }, - tenantConfig: TenantConfig{ - TenantPrefixes: []string{"tenant1", "tenant2"}, + Prefix: "base/", }, - expectedPrefixes: []string{"tenant1", "tenant2"}, - expectedEffectivePrefixes: []string{"v1/raw/tenant1", "v1/raw/tenant2"}, + tenantPrefixes: []string{"v1/raw/tenant1", "v1/raw/tenant2"}, + expectedEffectivePrefixes: []string{"base/v1/raw/tenant1", "base/v1/raw/tenant2"}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Simulate what runCompact does - tenantPrefixes := tt.tenantConfig.TenantPrefixes - if len(tenantPrefixes) == 0 { - tenantPrefixes = []string{""} - } - - testutil.Equals(t, tt.expectedPrefixes, tenantPrefixes) - - // Simulate bucket creation for each tenant + // Simulate bucket creation for each tenant prefix var actualEffectivePrefixes []string - for _, tenantPrefix := range tenantPrefixes { + for _, tenantPrefix := range tt.tenantPrefixes { bucketConf := &client.BucketConfig{ Type: tt.bucketConfig.Type, Config: tt.bucketConfig.Config, @@ -229,121 +222,79 @@ func TestBucketConfigPrefixPreservation(t *testing.T) { } } -func TestTenantPrefixesFromYAML(t *testing.T) { +func TestBucketConfigFromYAML(t *testing.T) { tests := []struct { - name string - tenantYamlConfig string - bucketYamlConfig string - expectedTenantPrefixes []string - expectedPrefix string - expectedType client.ObjProvider + name string + bucketYamlConfig string + expectedPrefix string + expectedType client.ObjProvider }{ { - name: "multi-tenant config without base prefix", - tenantYamlConfig: ` -tenant_prefixes: - - tenant-alpha - - tenant-beta - - tenant-gamma -`, + name: "filesystem without prefix", bucketYamlConfig: ` type: FILESYSTEM config: directory: /tmp/test prefix: "" `, - expectedTenantPrefixes: []string{"tenant-alpha", "tenant-beta", "tenant-gamma"}, - expectedPrefix: "", - expectedType: client.FILESYSTEM, + expectedPrefix: "", + expectedType: client.FILESYSTEM, }, { - name: "multi-tenant config with base prefix", - tenantYamlConfig: ` -tenant_prefixes: - - tenant-a - - tenant-b -`, + name: "filesystem with v1/raw prefix", bucketYamlConfig: ` type: FILESYSTEM config: directory: /tmp/test prefix: "v1/raw/" `, - expectedTenantPrefixes: []string{"tenant-a", "tenant-b"}, - expectedPrefix: "v1/raw/", - expectedType: client.FILESYSTEM, + expectedPrefix: "v1/raw/", + expectedType: client.FILESYSTEM, }, { - name: "single-tenant config (empty YAML)", - tenantYamlConfig: `{}`, + name: "S3 with data prefix", bucketYamlConfig: ` -type: FILESYSTEM +type: S3 config: - directory: /tmp/test -prefix: "" + bucket: test-bucket + endpoint: s3.amazonaws.com +prefix: "data/" `, - expectedTenantPrefixes: nil, - expectedPrefix: "", - expectedType: client.FILESYSTEM, + expectedPrefix: "data/", + expectedType: client.S3, }, { - name: "S3 multi-tenant config", - tenantYamlConfig: ` -tenant_prefixes: - - org1 - - org2 -`, + name: "filesystem with tenant path", bucketYamlConfig: ` -type: S3 +type: FILESYSTEM config: - bucket: test-bucket - endpoint: s3.amazonaws.com -prefix: "data/" + directory: /tmp/test +prefix: "v1/raw/tenant-alpha" `, - expectedTenantPrefixes: []string{"org1", "org2"}, - expectedPrefix: "data/", - expectedType: client.S3, + expectedPrefix: "v1/raw/tenant-alpha", + expectedType: client.FILESYSTEM, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var tenantConfig TenantConfig - err := yaml.Unmarshal([]byte(tt.tenantYamlConfig), &tenantConfig) - testutil.Ok(t, err) - var bucketConfig client.BucketConfig - err = yaml.Unmarshal([]byte(tt.bucketYamlConfig), &bucketConfig) + err := yaml.Unmarshal([]byte(tt.bucketYamlConfig), &bucketConfig) testutil.Ok(t, err) // Verify all fields are correctly parsed - testutil.Equals(t, tt.expectedTenantPrefixes, tenantConfig.TenantPrefixes) testutil.Equals(t, tt.expectedPrefix, bucketConfig.Prefix) testutil.Equals(t, tt.expectedType, bucketConfig.Type) // Verify we can marshal back - tenantYamlBytes, err := yaml.Marshal(tenantConfig) - testutil.Ok(t, err) - bucketYamlBytes, err := yaml.Marshal(bucketConfig) testutil.Ok(t, err) // Verify we can unmarshal again and get the same result - var roundtripTenantConfig TenantConfig - err = yaml.Unmarshal(tenantYamlBytes, &roundtripTenantConfig) - testutil.Ok(t, err) - var roundtripBucketConfig client.BucketConfig err = yaml.Unmarshal(bucketYamlBytes, &roundtripBucketConfig) testutil.Ok(t, err) - // For roundtrip, verify configs match - // Note: nil and empty slice are equivalent for our purposes - if len(tenantConfig.TenantPrefixes) == 0 && len(roundtripTenantConfig.TenantPrefixes) == 0 { - // Both are empty (nil or []string{}), which is fine - } else { - testutil.Equals(t, tenantConfig.TenantPrefixes, roundtripTenantConfig.TenantPrefixes) - } testutil.Equals(t, bucketConfig.Prefix, roundtripBucketConfig.Prefix) testutil.Equals(t, bucketConfig.Type, roundtripBucketConfig.Type) }) From 716be9457d200dfedad16702518bf150c1b6593c Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 1 Dec 2025 22:27:04 +0000 Subject: [PATCH 29/35] Compute total shards --- cmd/thanos/compact.go | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index f378d8f18b6..2a12d87fff1 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -229,8 +229,13 @@ func runCompact( return errors.Wrapf(err, "failed to extract ordinal from hostname %s", hostname) } - if ordinal >= conf.totalShards { - return errors.Errorf("ordinal (%d) must be less than total-shards (%d)", ordinal, conf.totalShards) + totalShards := conf.replicas / conf.replicationFactor + if conf.replicas%conf.replicationFactor != 0 || totalShards < 1 { + return errors.Errorf("replicas (%d) must be divisible by replication-factor (%d) and total-shards must be greater than 0", conf.replicas, conf.replicationFactor) + } + + if ordinal >= totalShards { + return errors.Errorf("ordinal (%d) must be less than total-shards (%d)", ordinal, totalShards) } // Read tenant weights file path @@ -248,12 +253,12 @@ func runCompact( level.Info(logger).Log("msg", "setting up tenant partitioning", "ordinal", ordinal, - "total_shards", conf.totalShards, + "total_shards", totalShards, "common_path_prefix", conf.commonPathPrefix) // Get tenant assignments for this shard ctx := context.Background() - tenantAssignments, err := compact.SetupTenantPartitioning(ctx, discoveryBkt, logger, tenantWeightsPath, conf.commonPathPrefix, conf.totalShards) + tenantAssignments, err := compact.SetupTenantPartitioning(ctx, discoveryBkt, logger, tenantWeightsPath, conf.commonPathPrefix, totalShards) if err != nil { return errors.Wrap(err, "failed to setup tenant partitioning") } @@ -934,7 +939,8 @@ type compactConfig struct { filterConf *store.FilterConfig disableAdminOperations bool tenantWeightsFile extflag.PathOrContent - totalShards int + replicas int + replicationFactor int commonPathPrefix string enableTenantPathPrefix bool } @@ -1055,8 +1061,11 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) { cc.tenantWeightsFile = *extflag.RegisterPathOrContent(cmd, "compact.tenant-weights", "YAML file that contains the tenant weights for tenant partitioning.", extflag.WithEnvSubstitution()) - cmd.Flag("compact.total-shards", "Total number of shards when using tenant partitioning."). - Default("1").IntVar(&cc.totalShards) + cmd.Flag("compact.replicas", "Total replicas of the stateful set."). + Default("1").IntVar(&cc.replicas) + + cmd.Flag("compact.replication-factor", "Replication factor of the stateful set."). + Default("1").IntVar(&cc.replicationFactor) cmd.Flag("compact.common-path-prefix", "Common path prefix for tenant discovery when using tenant partitioning. This is the prefix before the tenant name in the object storage path."). Default("v1/raw/").StringVar(&cc.commonPathPrefix) From e4e85a189c3c4a1a5a36c725f0e00282b55fc1ab Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 1 Dec 2025 22:35:13 +0000 Subject: [PATCH 30/35] rename tenant-weights-file --- cmd/thanos/compact.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 2a12d87fff1..9e323f1ad27 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -1059,7 +1059,7 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) { cc.selectorRelabelConf = *extkingpin.RegisterSelectorRelabelFlags(cmd) - cc.tenantWeightsFile = *extflag.RegisterPathOrContent(cmd, "compact.tenant-weights", "YAML file that contains the tenant weights for tenant partitioning.", extflag.WithEnvSubstitution()) + cc.tenantWeightsFile = *extflag.RegisterPathOrContent(cmd, "compact.tenant-weights-file", "YAML file that contains the tenant weights for tenant partitioning.", extflag.WithEnvSubstitution()) cmd.Flag("compact.replicas", "Total replicas of the stateful set."). Default("1").IntVar(&cc.replicas) From 988af60c30bb9bb3b3375c70b2bc924b52253d3e Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Tue, 2 Dec 2025 00:42:22 +0000 Subject: [PATCH 31/35] Changed tenantLogger and tenantReg --- cmd/thanos/compact.go | 51 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 9e323f1ad27..cb16cfd837f 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -326,9 +326,8 @@ func runCompact( } // Create tenant-specific logger - tenantLogger := logger if tenantPrefix != "" { - tenantLogger = log.With(logger, "tenant_prefix", tenantPrefix) + logger = log.With(logger, "tenant_prefix", tenantPrefix) } relabelContentYaml, err := conf.selectorRelabelConf.Content() @@ -377,7 +376,7 @@ func runCompact( if tenantPrefix != "" { baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, nil) // TODO (willh-db): revisit metrics here } else { - baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", tenantReg)) + baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) } if err != nil { return errors.Wrap(err, "create meta fetcher") @@ -387,12 +386,12 @@ func runCompact( dedupReplicaLabels := strutil.ParseFlagLabels(conf.dedupReplicaLabels) if len(dedupReplicaLabels) > 0 { enableVerticalCompaction = true - level.Info(tenantLogger).Log( + level.Info(logger).Log( "msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(dedupReplicaLabels, ","), ) } if enableVerticalCompaction { - level.Info(tenantLogger).Log( + level.Info(logger).Log( "msg", "vertical compaction is enabled", "compact.enable-vertical-compaction", fmt.Sprintf("%v", conf.enableVerticalCompaction), ) } @@ -418,7 +417,7 @@ func runCompact( if tenantPrefix != "" { cf = baseMetaFetcher.NewMetaFetcher(nil, filters) // TODO (willh-db): revisit metrics here } else { - cf = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", tenantReg), filters) + cf = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", reg), filters) } cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetLoaded(blocks, err) @@ -453,7 +452,7 @@ func runCompact( } if conf.maxCompactionLevel < compactions.maxLevel() { - level.Warn(tenantLogger).Log("msg", "Max compaction level is lower than should be", "current", conf.maxCompactionLevel, "default", compactions.maxLevel()) + level.Warn(logger).Log("msg", "Max compaction level is lower than should be", "current", conf.maxCompactionLevel, "default", compactions.maxLevel()) } ctx, cancel := context.WithCancel(context.Background()) @@ -483,7 +482,7 @@ func runCompact( // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(ctx, tenantReg, tenantLogger, levels, downsample.NewPool(), mergeFunc) + comp, err := tsdb.NewLeveledCompactor(ctx, tenantReg, logger, levels, downsample.NewPool(), mergeFunc) if err != nil { return errors.Wrap(err, "create compactor") } @@ -557,22 +556,22 @@ func runCompact( if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() < downsample.ResLevel1DownsampleRange { return errors.New("raw resolution must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") } - level.Info(tenantLogger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) + level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) } if retentionByResolution[compact.ResolutionLevel5m].Milliseconds() != 0 { // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Milliseconds() < downsample.ResLevel2DownsampleRange { return errors.New("5m resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") } - level.Info(tenantLogger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) + level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) } if retentionByResolution[compact.ResolutionLevel1h].Milliseconds() != 0 { - level.Info(tenantLogger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) + level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } retentionByTenant, err := compact.ParesRetentionPolicyByTenant(logger, *conf.retentionTenants) if err != nil { - level.Error(tenantLogger).Log("msg", "failed to parse retention policy by tenant", "err", err) + level.Error(logger).Log("msg", "failed to parse retention policy by tenant", "err", err) return err } @@ -588,7 +587,7 @@ func runCompact( } progress.Set(compact.CleanBlocks) - compact.BestEffortCleanAbortedPartialUploads(ctx, tenantLogger, sy.Partial(), insBkt, compactMetrics.partialUploadDeleteAttempts, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) + compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), insBkt, compactMetrics.partialUploadDeleteAttempts, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { return errors.Wrap(err, "cleaning marked blocks") } @@ -601,7 +600,7 @@ func runCompact( defer progress.Idle() // this should happen before any compaction to remove unnecessary process on backlogs beyond retention. if len(retentionByTenant) != 0 && len(sy.Metas()) == 0 { - level.Info(tenantLogger).Log("msg", "sync before tenant retention due to no blocks") + level.Info(logger).Log("msg", "sync before tenant retention due to no blocks") progress.Set(compact.SyncMeta) if err := sy.SyncMetas(ctx); err != nil { return errors.Wrap(err, "sync before tenant retention") @@ -609,7 +608,7 @@ func runCompact( } progress.Set(compact.ApplyRetention) - if err := compact.ApplyRetentionPolicyByTenant(ctx, tenantLogger, insBkt, sy.Metas(), retentionByTenant, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, metadata.TenantRetentionExpired)); err != nil { + if err := compact.ApplyRetentionPolicyByTenant(ctx, logger, insBkt, sy.Metas(), retentionByTenant, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, metadata.TenantRetentionExpired)); err != nil { return errors.Wrap(err, "retention by tenant failed") } @@ -621,7 +620,7 @@ func runCompact( // After all compactions are done, work down the downsampling backlog. // We run two passes of this to ensure that the 1h downsampling is generated // for 5m downsamplings created in the first run. - level.Info(tenantLogger).Log("msg", "start first pass of downsampling") + level.Info(logger).Log("msg", "start first pass of downsampling") progress.Set(compact.SyncMeta) if err := sy.SyncMetas(ctx); err != nil { return errors.Wrap(err, "sync before first pass of downsampling") @@ -654,7 +653,7 @@ func runCompact( return errors.Wrap(err, "first pass of downsampling failed") } - level.Info(tenantLogger).Log("msg", "start second pass of downsampling") + level.Info(logger).Log("msg", "start second pass of downsampling") progress.Set(compact.SyncMeta) if err := sy.SyncMetas(ctx); err != nil { return errors.Wrap(err, "sync before second pass of downsampling") @@ -683,9 +682,9 @@ func runCompact( return errors.Wrap(err, "second pass of downsampling failed") } - level.Info(tenantLogger).Log("msg", "downsampling iterations done") + level.Info(logger).Log("msg", "downsampling iterations done") } else { - level.Info(tenantLogger).Log("msg", "downsampling was explicitly disabled") + level.Info(logger).Log("msg", "downsampling was explicitly disabled") } // TODO(bwplotka): Find a way to avoid syncing if no op was done. @@ -695,7 +694,7 @@ func runCompact( } progress.Set(compact.ApplyRetention) - if err := compact.ApplyRetentionPolicyByResolution(ctx, tenantLogger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil { + if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil { return errors.Wrap(err, "retention failed") } @@ -724,7 +723,7 @@ func runCompact( // for investigation. You should alert on this being halted. if compact.IsHaltError(err) { if conf.haltOnError { - level.Error(tenantLogger).Log("msg", "critical error detected; halting", "err", err) + level.Error(logger).Log("msg", "critical error detected; halting", "err", err) compactMetrics.halted.Set(1) select {} } else { @@ -735,7 +734,7 @@ func runCompact( // The RetryError signals that we hit an retriable error (transient error, no connection). // You should alert on this being triggered too frequently. if compact.IsRetryError(err) { - level.Error(tenantLogger).Log("msg", "retriable error", "err", err) + level.Error(logger).Log("msg", "retriable error", "err", err) compactMetrics.retried.Inc() // TODO(bplotka): use actual "retry()" here instead of waiting 5 minutes? return nil @@ -764,7 +763,7 @@ func runCompact( return logging.NoLogCall })} logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...) - api.Register(r.WithPrefix("/api/v1"), tracer, tenantLogger, ins, logMiddleware) + api.Register(r.WithPrefix("/api/v1"), tracer, logger, ins, logMiddleware) // Separate fetcher for global view. // TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well. @@ -772,7 +771,7 @@ func runCompact( if tenantPrefix != "" { f = baseMetaFetcher.NewMetaFetcher(nil, nil, "component", "globalBucketUI") // TODO (willh-db): revisit metrics here } else { - f = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", tenantReg), nil, "component", "globalBucketUI") + f = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") } f.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetGlobal(blocks, err) @@ -811,7 +810,7 @@ func runCompact( if err != nil && compact.IsRetryError(err) { // The RetryError signals that we hit an retriable error (transient error, no connection). // You should alert on this being triggered too frequently. - level.Error(tenantLogger).Log("msg", "retriable error", "err", err) + level.Error(logger).Log("msg", "retriable error", "err", err) compactMetrics.retried.Inc() return nil @@ -842,7 +841,7 @@ func runCompact( // The RetryError signals that we hit an retriable error (transient error, no connection). // You should alert on this being triggered too frequently. if compact.IsRetryError(err) { - level.Error(tenantLogger).Log("msg", "retriable error", "err", err) + level.Error(logger).Log("msg", "retriable error", "err", err) compactMetrics.retried.Inc() return nil From 8f3fe1cd5a74d7ae3b69d9498aa89f952742da79 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Wed, 3 Dec 2025 01:18:09 +0000 Subject: [PATCH 32/35] Changed tenantReg to reg and added baseReg --- cmd/thanos/compact.go | 73 +++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index cb16cfd837f..c96ce7be241 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -175,23 +175,23 @@ func runCompact( g *run.Group, logger log.Logger, tracer opentracing.Tracer, - reg *prometheus.Registry, + baseReg *prometheus.Registry, component component.Component, conf compactConfig, flagsMap map[string]string, ) (rerr error) { deleteDelay := time.Duration(conf.deleteDelay) - compactMetrics := newCompactMetrics(reg, deleteDelay) - progressRegistry := compact.NewProgressRegistry(reg, logger) - downsampleMetrics := newDownsampleMetrics(reg) + compactMetrics := newCompactMetrics(baseReg, deleteDelay) + progressRegistry := compact.NewProgressRegistry(baseReg, logger) + downsampleMetrics := newDownsampleMetrics(baseReg) httpProbe := prober.NewHTTP() statusProber := prober.Combine( httpProbe, - prober.NewInstrumentation(component, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)), + prober.NewInstrumentation(component, logger, extprom.WrapRegistererWithPrefix("thanos_", baseReg)), ) - srv := httpserver.New(logger, reg, component, httpProbe, + srv := httpserver.New(logger, baseReg, component, httpProbe, httpserver.WithListen(conf.http.bindAddress), httpserver.WithGracePeriod(time.Duration(conf.http.gracePeriod)), httpserver.WithTLSConfig(conf.http.tlsConfig), @@ -285,7 +285,7 @@ func runCompact( level.Info(logger).Log("msg", "running in single-tenant mode") } - overlappingCallback := compact.NewOverlappingCompactionLifecycleCallback(reg, logger, conf.enableOverlappingRemoval) + isMultiTenant := len(tenantPrefixes) > 1 // Start compaction for each tenant // Each will get its own bucket created via client.NewBucket with the appropriate prefix @@ -313,20 +313,17 @@ func runCompact( } var insBkt objstore.InstrumentedBucket - var tenantReg prometheus.Registerer + var reg prometheus.Registerer - if tenantPrefix != "" { - // For multi-tenant mode, we pass a nil registerer to avoid metric collisions - // TODO (willh-db): revisit metrics structure for multi-tenant mode - tenantReg = nil - insBkt = objstoretracing.WrapWithTraces(bkt) + if isMultiTenant { + reg = prometheus.WrapRegistererWith(prometheus.Labels{"tenant": tenantPrefix}, baseReg) } else { - tenantReg = reg - insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", tenantReg), bkt.Name())) + reg = baseReg } + insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) // Create tenant-specific logger - if tenantPrefix != "" { + if isMultiTenant { logger = log.With(logger, "tenant_prefix", tenantPrefix) } @@ -355,12 +352,7 @@ func runCompact( noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - var consistencyDelayMetaFilter *block.ConsistencyDelayMetaFilter - if tenantPrefix != "" { - consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, nil) // TODO (willh-db): revisit metrics here - } else { - consistencyDelayMetaFilter = block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", tenantReg))) - } + consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) var blockLister block.Lister @@ -372,12 +364,7 @@ func runCompact( default: return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) } - var baseMetaFetcher *block.BaseFetcher - if tenantPrefix != "" { - baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, nil) // TODO (willh-db): revisit metrics here - } else { - baseMetaFetcher, err = block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) - } + baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) if err != nil { return errors.Wrap(err, "create meta fetcher") } @@ -413,12 +400,7 @@ func runCompact( filters = append(filters, noDownsampleMarkerFilter) } // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. - var cf *block.MetaFetcher - if tenantPrefix != "" { - cf = baseMetaFetcher.NewMetaFetcher(nil, filters) // TODO (willh-db): revisit metrics here - } else { - cf = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", reg), filters) - } + cf := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", reg), filters) // TODO (willh-db): revisit metrics here cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetLoaded(blocks, err) }) @@ -432,7 +414,7 @@ func runCompact( } sy, err = compact.NewMetaSyncer( logger, - tenantReg, + reg, insBkt, cf, duplicateBlocksFilter, @@ -482,7 +464,7 @@ func runCompact( // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(ctx, tenantReg, logger, levels, downsample.NewPool(), mergeFunc) + comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool(), mergeFunc) if err != nil { return errors.Wrap(err, "create compactor") } @@ -505,7 +487,7 @@ func runCompact( insBkt, conf.acceptMalformedIndex, enableVerticalCompaction, - tenantReg, + reg, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), compactMetrics.garbageCollectedBlocks, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), @@ -535,7 +517,7 @@ func runCompact( planner, comp, compact.DefaultBlockDeletableChecker{}, - overlappingCallback, + compact.NewOverlappingCompactionLifecycleCallback(baseReg, logger, conf.enableOverlappingRemoval), compactDir, insBkt, conf.compactionConcurrency, @@ -753,7 +735,7 @@ func runCompact( if isFirstTenant && !conf.disableWeb { r := route.New() - ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) + ins := extpromhttp.NewInstrumentationMiddleware(baseReg, nil) global := ui.NewBucketUI(logger, conf.webConf.externalPrefix, conf.webConf.prefixHeaderName, component) global.Register(r, ins) @@ -767,12 +749,7 @@ func runCompact( // Separate fetcher for global view. // TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well. - var f *block.MetaFetcher - if tenantPrefix != "" { - f = baseMetaFetcher.NewMetaFetcher(nil, nil, "component", "globalBucketUI") // TODO (willh-db): revisit metrics here - } else { - f = baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") - } + f := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", baseReg), nil, "component", "globalBucketUI") f.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetGlobal(blocks, err) }) @@ -826,11 +803,11 @@ func runCompact( // Periodically calculate the progress of compaction, downsampling and retention. if isFirstTenant && conf.progressCalculateInterval > 0 { g.Add(func() error { - ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner) - rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution) + ps := compact.NewCompactionProgressCalculator(baseReg, tsdbPlanner) + rs := compact.NewRetentionProgressCalculator(baseReg, retentionByResolution) var ds *compact.DownsampleProgressCalculator if !conf.disableDownsampling { - ds = compact.NewDownsampleProgressCalculator(reg) + ds = compact.NewDownsampleProgressCalculator(baseReg) } return runutil.Repeat(conf.progressCalculateInterval, ctx.Done(), func() error { From 61bfc8c799b2af18c700f5d0d091596c24742067 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Wed, 3 Dec 2025 01:21:44 +0000 Subject: [PATCH 33/35] minor formatting --- cmd/thanos/compact.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index c96ce7be241..275b1a3c12b 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -312,7 +312,6 @@ func runCompact( return err } - var insBkt objstore.InstrumentedBucket var reg prometheus.Registerer if isMultiTenant { @@ -320,7 +319,7 @@ func runCompact( } else { reg = baseReg } - insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) + insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) // Create tenant-specific logger if isMultiTenant { @@ -352,7 +351,7 @@ func runCompact( noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, (extprom.WrapRegistererWithPrefix("thanos_", reg))) + consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", reg)) timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) var blockLister block.Lister @@ -400,7 +399,8 @@ func runCompact( filters = append(filters, noDownsampleMarkerFilter) } // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. - cf := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_", reg), filters) // TODO (willh-db): revisit metrics here + cf := baseMetaFetcher.NewMetaFetcher( + extprom.WrapRegistererWithPrefix("thanos_", reg), filters) cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { api.SetLoaded(blocks, err) }) From 533913991b46116c60dca2f78155153119619ee7 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 8 Dec 2025 20:42:06 +0000 Subject: [PATCH 34/35] Refactored into several functions --- cmd/thanos/compact.go | 1028 ++++++++++++++++++++++------------------- 1 file changed, 553 insertions(+), 475 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 275b1a3c12b..a91945b24a7 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -24,6 +24,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" "github.com/prometheus/common/route" + "github.com/prometheus/prometheus/model/relabel" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb" "gopkg.in/yaml.v2" @@ -175,23 +176,25 @@ func runCompact( g *run.Group, logger log.Logger, tracer opentracing.Tracer, - baseReg *prometheus.Registry, + reg *prometheus.Registry, component component.Component, conf compactConfig, flagsMap map[string]string, ) (rerr error) { deleteDelay := time.Duration(conf.deleteDelay) - compactMetrics := newCompactMetrics(baseReg, deleteDelay) - progressRegistry := compact.NewProgressRegistry(baseReg, logger) - downsampleMetrics := newDownsampleMetrics(baseReg) + compactMetrics := newCompactMetrics(reg, deleteDelay) + progressRegistry := compact.NewProgressRegistry(reg, logger) + downsampleMetrics := newDownsampleMetrics(reg) + + ctx := context.Background() httpProbe := prober.NewHTTP() statusProber := prober.Combine( httpProbe, - prober.NewInstrumentation(component, logger, extprom.WrapRegistererWithPrefix("thanos_", baseReg)), + prober.NewInstrumentation(component, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)), ) - srv := httpserver.New(logger, baseReg, component, httpProbe, + srv := httpserver.New(logger, reg, component, httpProbe, httpserver.WithListen(conf.http.bindAddress), httpserver.WithGracePeriod(time.Duration(conf.http.gracePeriod)), httpserver.WithTLSConfig(conf.http.tlsConfig), @@ -220,8 +223,10 @@ func runCompact( // Setup tenant partitioning if enabled var tenantPrefixes []string + var isMultiTenant bool if conf.enableTenantPathPrefix { + isMultiTenant = true hostname := os.Getenv("HOSTNAME") ordinal, err := extractOrdinalFromHostname(hostname) @@ -257,7 +262,6 @@ func runCompact( "common_path_prefix", conf.commonPathPrefix) // Get tenant assignments for this shard - ctx := context.Background() tenantAssignments, err := compact.SetupTenantPartitioning(ctx, discoveryBkt, logger, tenantWeightsPath, conf.commonPathPrefix, totalShards) if err != nil { return errors.Wrap(err, "failed to setup tenant partitioning") @@ -281,11 +285,129 @@ func runCompact( "tenants", strings.Join(assignedTenants, ",")) } else { // Single-tenant mode + isMultiTenant = false tenantPrefixes = []string{""} level.Info(logger).Log("msg", "running in single-tenant mode") } - isMultiTenant := len(tenantPrefixes) > 1 + relabelContentYaml, err := conf.selectorRelabelConf.Content() + if err != nil { + return errors.Wrap(err, "get content of relabel configuration") + } + relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions) + if err != nil { + return err + } + + retentionByResolution := map[compact.ResolutionLevel]time.Duration{ + compact.ResolutionLevelRaw: time.Duration(conf.retentionRaw), + compact.ResolutionLevel5m: time.Duration(conf.retentionFiveMin), + compact.ResolutionLevel1h: time.Duration(conf.retentionOneHr), + } + + if retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() != 0 { + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() < downsample.ResLevel1DownsampleRange { + return errors.New("raw resolution must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") + } + level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) + } + if retentionByResolution[compact.ResolutionLevel5m].Milliseconds() != 0 { + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Milliseconds() < downsample.ResLevel2DownsampleRange { + return errors.New("5m resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") + } + level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) + } + if retentionByResolution[compact.ResolutionLevel1h].Milliseconds() != 0 { + level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) + } + + levels, err := compactions.levels(conf.maxCompactionLevel) + if err != nil { + return errors.Wrap(err, "get compaction levels") + } + if conf.maxCompactionLevel < compactions.maxLevel() { + level.Warn(logger).Log("msg", "Max compaction level is lower than should be", "current", conf.maxCompactionLevel, "default", compactions.maxLevel()) + } + + dedupReplicaLabels := strutil.ParseFlagLabels(conf.dedupReplicaLabels) + enableVerticalCompaction := conf.enableVerticalCompaction + if len(dedupReplicaLabels) > 0 { + enableVerticalCompaction = true + level.Info(logger).Log( + "msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(dedupReplicaLabels, ","), + ) + } + if enableVerticalCompaction { + level.Info(logger).Log( + "msg", "vertical compaction is enabled", "compact.enable-vertical-compaction", fmt.Sprintf("%v", conf.enableVerticalCompaction), + ) + } + + var mergeFunc storage.VerticalChunkSeriesMergeFunc + switch conf.dedupFunc { + case compact.DedupAlgorithmPenalty: + mergeFunc = dedup.NewChunkSeriesMerger() + if len(dedupReplicaLabels) == 0 { + return errors.New("penalty based deduplication needs at least one replica label specified") + } + case "": + mergeFunc = storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge) + default: + return errors.Errorf("unsupported deduplication func, got %s", conf.dedupFunc) + } + + compactDir := path.Join(conf.dataDir, "compact") + downsamplingDir := path.Join(conf.dataDir, "downsample") + if err := os.MkdirAll(compactDir, os.ModePerm); err != nil { + return errors.Wrap(err, "create working compact directory") + } + if err := os.MkdirAll(downsamplingDir, os.ModePerm); err != nil { + return errors.Wrap(err, "create working downsample directory") + } + + // Create a global bucket for the web UI (without tenant prefix) + globalBkt, err := client.NewBucket(logger, confContentYaml, component.String(), nil) + if err != nil { + return errors.Wrap(err, "create global bucket") + } + globalInsBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(globalBkt, extprom.WrapRegistererWithPrefix("thanos_", reg), globalBkt.Name())) + + defer func() { + if rerr != nil { + runutil.CloseWithLogOnErr(logger, globalInsBkt, "global bucket client") + } + }() + + // Create block lister based on strategy + var blockLister block.Lister + switch conf.blockListStrategy { + case string(concurrentDiscovery): + blockLister = block.NewConcurrentLister(logger, globalInsBkt) + case string(recursiveDiscovery): + blockLister = block.NewRecursiveLister(logger, globalInsBkt) + default: + blockLister = block.NewConcurrentLister(logger, globalInsBkt) + } + + // Create base meta fetcher for global view (web UI) + baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, globalInsBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + if err != nil { + return errors.Wrap(err, "create base meta fetcher") + } + + // Create global API for web UI + globalAPI := blocksAPI.NewBlocksAPI(logger, conf.webConf.disableCORS, conf.label, flagsMap, globalInsBkt) + + // Create context with cancel for the entire compactor + ctx, cancel := context.WithCancel(ctx) + ctx = tracing.ContextWithTracer(ctx, tracer) + ctx = objstoretracing.ContextWithTracer(ctx, tracer) + + defer func() { + if rerr != nil { + cancel() + } + }() // Start compaction for each tenant // Each will get its own bucket created via client.NewBucket with the appropriate prefix @@ -312,28 +434,21 @@ func runCompact( return err } - var reg prometheus.Registerer + var tenantReg prometheus.Registerer if isMultiTenant { - reg = prometheus.WrapRegistererWith(prometheus.Labels{"tenant": tenantPrefix}, baseReg) + tenantReg = prometheus.WrapRegistererWith(prometheus.Labels{"tenant": tenantPrefix}, reg) } else { - reg = baseReg + tenantReg = reg } insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) // Create tenant-specific logger + var tenantLogger log.Logger if isMultiTenant { - logger = log.With(logger, "tenant_prefix", tenantPrefix) - } - - relabelContentYaml, err := conf.selectorRelabelConf.Content() - if err != nil { - return errors.Wrap(err, "get content of relabel configuration") - } - - relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions) - if err != nil { - return err + tenantLogger = log.With(logger, "tenant_prefix", tenantPrefix) + } else { + tenantLogger = logger } // Ensure we close up everything properly. @@ -343,536 +458,499 @@ func runCompact( } }() - // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. - // The delay of deleteDelay/2 is added to ensure we fetch blocks that are meant to be deleted but do not have a replacement yet. - // This is to make sure compactor will not accidentally perform compactions with gap instead. - ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, deleteDelay/2, conf.blockMetaFetchConcurrency) - duplicateBlocksFilter := block.NewDeduplicateFilter(conf.blockMetaFetchConcurrency) - noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) - noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) - labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) - consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", reg)) - timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) - - var blockLister block.Lister - switch syncStrategy(conf.blockListStrategy) { - case concurrentDiscovery: - blockLister = block.NewConcurrentLister(logger, insBkt) - case recursiveDiscovery: - blockLister = block.NewRecursiveLister(logger, insBkt) - default: - return errors.Errorf("unknown sync strategy %s", conf.blockListStrategy) - } - baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, blockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) - if err != nil { - return errors.Wrap(err, "create meta fetcher") - } - - enableVerticalCompaction := conf.enableVerticalCompaction - dedupReplicaLabels := strutil.ParseFlagLabels(conf.dedupReplicaLabels) - if len(dedupReplicaLabels) > 0 { - enableVerticalCompaction = true - level.Info(logger).Log( - "msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(dedupReplicaLabels, ","), - ) - } - if enableVerticalCompaction { - level.Info(logger).Log( - "msg", "vertical compaction is enabled", "compact.enable-vertical-compaction", fmt.Sprintf("%v", conf.enableVerticalCompaction), - ) - } - var ( - api = blocksAPI.NewBlocksAPI(logger, conf.webConf.disableCORS, conf.label, flagsMap, insBkt) - sy *compact.Syncer - ) - { - filters := []block.MetadataFilter{ - timePartitionMetaFilter, - labelShardedMetaFilter, - consistencyDelayMetaFilter, - ignoreDeletionMarkFilter, - block.NewReplicaLabelRemover(logger, dedupReplicaLabels), - duplicateBlocksFilter, - noCompactMarkerFilter, - } - if !conf.disableDownsampling { - filters = append(filters, noDownsampleMarkerFilter) - } - // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. - cf := baseMetaFetcher.NewMetaFetcher( - extprom.WrapRegistererWithPrefix("thanos_", reg), filters) - cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { - api.SetLoaded(blocks, err) - }) - - // Still use blockViewerSyncBlockTimeout to retain original behavior before this upstream change: - // https://github.com/databricks/thanos/commit/ab43b2b20cb42eca2668824a4084307216c6da2e#diff-6c2257b871fd1196514f664bc7e44cb21681215e0929710d0ad5ceea90b8e122R294 - // Otherwise Azure won't work due to its high latency - var syncMetasTimeout = conf.blockViewerSyncBlockTimeout - if !conf.wait { - syncMetasTimeout = 0 - } - sy, err = compact.NewMetaSyncer( - logger, - reg, - insBkt, - cf, - duplicateBlocksFilter, - ignoreDeletionMarkFilter, - compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), - compactMetrics.garbageCollectedBlocks, - syncMetasTimeout, - ) - if err != nil { - return errors.Wrap(err, "create syncer") - } - } - - levels, err := compactions.levels(conf.maxCompactionLevel) - if err != nil { - return errors.Wrap(err, "get compaction levels") - } - - if conf.maxCompactionLevel < compactions.maxLevel() { - level.Warn(logger).Log("msg", "Max compaction level is lower than should be", "current", conf.maxCompactionLevel, "default", compactions.maxLevel()) - } - - ctx, cancel := context.WithCancel(context.Background()) - ctx = tracing.ContextWithTracer(ctx, tracer) - ctx = objstoretracing.ContextWithTracer(ctx, tracer) // objstore tracing uses a different tracer key in context. + runCompactForTenant(g, ctx, tenantLogger, tracer, tenantReg, reg, component, conf, flagsMap, tenantPrefix, deleteDelay, + compactMetrics, progressRegistry, downsampleMetrics, insBkt, relabelConfig, + retentionByResolution, levels, dedupReplicaLabels, enableVerticalCompaction, mergeFunc, + compactDir, downsamplingDir, cancel) + } - defer func() { - if rerr != nil { - cancel() - } - }() + postProcess(g, ctx, logger, tracer, srv, conf, reg, component, progressRegistry, compactMetrics, baseMetaFetcher, globalAPI, statusProber, cancel) - var mergeFunc storage.VerticalChunkSeriesMergeFunc - switch conf.dedupFunc { - case compact.DedupAlgorithmPenalty: - mergeFunc = dedup.NewChunkSeriesMerger() + level.Info(logger).Log("msg", "starting compact node") + statusProber.Ready() + return nil +} - if len(dedupReplicaLabels) == 0 { - return errors.New("penalty based deduplication needs at least one replica label specified") - } - case "": - mergeFunc = storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge) +func runCompactForTenant( + g *run.Group, + ctx context.Context, + logger log.Logger, + tracer opentracing.Tracer, + reg prometheus.Registerer, + baseReg *prometheus.Registry, + component component.Component, + conf compactConfig, + flagsMap map[string]string, + tenantPrefix string, + deleteDelay time.Duration, + compactMetrics *compactMetrics, + progressRegistry *compact.ProgressRegistry, + downsampleMetrics *DownsampleMetrics, + insBkt objstore.InstrumentedBucket, + relabelConfig []*relabel.Config, + retentionByResolution map[compact.ResolutionLevel]time.Duration, + levels []int64, + dedupReplicaLabels []string, + enableVerticalCompaction bool, + mergeFunc storage.VerticalChunkSeriesMergeFunc, + compactDir string, + downsamplingDir string, + cancel context.CancelFunc, +) (rerr error) { + // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. + // The delay of deleteDelay/2 is added to ensure we fetch blocks that are meant to be deleted but do not have a replacement yet. + // This is to make sure compactor will not accidentally perform compactions with gap instead. + ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, deleteDelay/2, conf.blockMetaFetchConcurrency) + duplicateBlocksFilter := block.NewDeduplicateFilter(conf.blockMetaFetchConcurrency) + noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) + noDownsampleMarkerFilter := downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency) + labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig) + consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", reg)) + timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime) + // Create tenant-specific block lister and base fetcher + var tenantBlockLister block.Lister + switch conf.blockListStrategy { + case string(concurrentDiscovery): + tenantBlockLister = block.NewConcurrentLister(logger, insBkt) + case string(recursiveDiscovery): + tenantBlockLister = block.NewRecursiveLister(logger, insBkt) + default: + tenantBlockLister = block.NewConcurrentLister(logger, insBkt) + } - default: - return errors.Errorf("unsupported deduplication func, got %s", conf.dedupFunc) - } + tenantBaseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, tenantBlockLister, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg)) + if err != nil { + return errors.Wrap(err, "create tenant base meta fetcher") + } - // Instantiate the compactor with different time slices. Timestamps in TSDB - // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool(), mergeFunc) - if err != nil { - return errors.Wrap(err, "create compactor") + var ( + tenantAPI = blocksAPI.NewBlocksAPI(logger, conf.webConf.disableCORS, conf.label, flagsMap, insBkt) + sy *compact.Syncer + ) + { + filters := []block.MetadataFilter{ + timePartitionMetaFilter, + labelShardedMetaFilter, + consistencyDelayMetaFilter, + ignoreDeletionMarkFilter, + block.NewReplicaLabelRemover(logger, dedupReplicaLabels), + duplicateBlocksFilter, + noCompactMarkerFilter, } - - var ( - compactDir = path.Join(conf.dataDir, "compact") - downsamplingDir = path.Join(conf.dataDir, "downsample") - ) - - if err := os.MkdirAll(compactDir, os.ModePerm); err != nil { - return errors.Wrap(err, "create working compact directory") + if !conf.disableDownsampling { + filters = append(filters, noDownsampleMarkerFilter) } + // Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability. + cf := tenantBaseMetaFetcher.NewMetaFetcher( + extprom.WrapRegistererWithPrefix("thanos_", reg), filters) + cf.UpdateOnChange(func(blocks []metadata.Meta, err error) { + tenantAPI.SetLoaded(blocks, err) + }) - if err := os.MkdirAll(downsamplingDir, os.ModePerm); err != nil { - return errors.Wrap(err, "create working downsample directory") + // Still use blockViewerSyncBlockTimeout to retain original behavior before this upstream change: + // https://github.com/databricks/thanos/commit/ab43b2b20cb42eca2668824a4084307216c6da2e#diff-6c2257b871fd1196514f664bc7e44cb21681215e0929710d0ad5ceea90b8e122R294 + // Otherwise Azure won't work due to its high latency + var syncMetasTimeout = conf.blockViewerSyncBlockTimeout + if !conf.wait { + syncMetasTimeout = 0 } - - grouper := compact.NewDefaultGrouper( + var syErr error + sy, syErr = compact.NewMetaSyncer( logger, - insBkt, - conf.acceptMalformedIndex, - enableVerticalCompaction, reg, + insBkt, + cf, + duplicateBlocksFilter, + ignoreDeletionMarkFilter, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), compactMetrics.garbageCollectedBlocks, - compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), - metadata.HashFunc(conf.hashFunc), - conf.blockFilesConcurrency, - conf.compactBlocksFetchConcurrency, - ) - var planner compact.Planner - - tsdbPlanner := compact.NewPlanner(logger, levels, noCompactMarkerFilter) - largeIndexFilterPlanner := compact.WithLargeTotalIndexSizeFilter( - tsdbPlanner, - insBkt, - int64(conf.maxBlockIndexSize), - compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.IndexSizeExceedingNoCompactReason), - ) - if enableVerticalCompaction { - planner = compact.WithVerticalCompactionDownsampleFilter(largeIndexFilterPlanner, insBkt, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.DownsampleVerticalCompactionNoCompactReason)) - } else { - planner = largeIndexFilterPlanner - } - blocksCleaner := compact.NewBlocksCleaner(logger, insBkt, ignoreDeletionMarkFilter, deleteDelay, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) - compactor, err := compact.NewBucketCompactorWithCheckerAndCallback( - logger, - sy, - grouper, - planner, - comp, - compact.DefaultBlockDeletableChecker{}, - compact.NewOverlappingCompactionLifecycleCallback(baseReg, logger, conf.enableOverlappingRemoval), - compactDir, - insBkt, - conf.compactionConcurrency, - conf.skipBlockWithOutOfOrderChunks, + syncMetasTimeout, ) - if err != nil { - return errors.Wrap(err, "create bucket compactor") + if syErr != nil { + return errors.Wrap(syErr, "create syncer") } + } - retentionByResolution := map[compact.ResolutionLevel]time.Duration{ - compact.ResolutionLevelRaw: time.Duration(conf.retentionRaw), - compact.ResolutionLevel5m: time.Duration(conf.retentionFiveMin), - compact.ResolutionLevel1h: time.Duration(conf.retentionOneHr), + // Instantiate the compactor with different time slices. Timestamps in TSDB + // are in milliseconds. + comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool(), mergeFunc) + if err != nil { + return errors.Wrap(err, "create compactor") + } + + grouper := compact.NewDefaultGrouper( + logger, + insBkt, + conf.acceptMalformedIndex, + enableVerticalCompaction, + reg, + compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""), + compactMetrics.garbageCollectedBlocks, + compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason), + metadata.HashFunc(conf.hashFunc), + conf.blockFilesConcurrency, + conf.compactBlocksFetchConcurrency, + ) + var planner compact.Planner + + tsdbPlanner := compact.NewPlanner(logger, levels, noCompactMarkerFilter) + largeIndexFilterPlanner := compact.WithLargeTotalIndexSizeFilter( + tsdbPlanner, + insBkt, + int64(conf.maxBlockIndexSize), + compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.IndexSizeExceedingNoCompactReason), + ) + if enableVerticalCompaction { + planner = compact.WithVerticalCompactionDownsampleFilter(largeIndexFilterPlanner, insBkt, compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.DownsampleVerticalCompactionNoCompactReason)) + } else { + planner = largeIndexFilterPlanner + } + blocksCleaner := compact.NewBlocksCleaner(logger, insBkt, ignoreDeletionMarkFilter, deleteDelay, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) + compactor, err := compact.NewBucketCompactorWithCheckerAndCallback( + logger, + sy, + grouper, + planner, + comp, + compact.DefaultBlockDeletableChecker{}, + compact.NewOverlappingCompactionLifecycleCallback(baseReg, logger, conf.enableOverlappingRemoval), + compactDir, + insBkt, + conf.compactionConcurrency, + conf.skipBlockWithOutOfOrderChunks, + ) + if err != nil { + return errors.Wrap(err, "create bucket compactor") + } + + retentionByTenant, err := compact.ParesRetentionPolicyByTenant(logger, *conf.retentionTenants) + if err != nil { + level.Error(logger).Log("msg", "failed to parse retention policy by tenant", "err", err) + return err + } + + var cleanMtx sync.Mutex + // TODO(GiedriusS): we could also apply retention policies here but the logic would be a bit more complex. + cleanPartialMarked := func(progress *compact.Progress) error { + cleanMtx.Lock() + defer cleanMtx.Unlock() + defer progress.Idle() + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "syncing metas") } - if retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() != 0 { - // If downsampling is enabled, error if raw retention is not sufficient for downsampling to occur (upper bound 10 days for 1h resolution) - if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() < downsample.ResLevel1DownsampleRange { - return errors.New("raw resolution must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") - } - level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) + progress.Set(compact.CleanBlocks) + compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), insBkt, compactMetrics.partialUploadDeleteAttempts, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) + if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { + return errors.Wrap(err, "cleaning marked blocks") } - if retentionByResolution[compact.ResolutionLevel5m].Milliseconds() != 0 { - // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted - if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Milliseconds() < downsample.ResLevel2DownsampleRange { - return errors.New("5m resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") + compactMetrics.cleanups.Inc() + + return nil + } + + compactMainFn := func(progress *compact.Progress) error { + defer progress.Idle() + // this should happen before any compaction to remove unnecessary process on backlogs beyond retention. + if len(retentionByTenant) != 0 && len(sy.Metas()) == 0 { + level.Info(logger).Log("msg", "sync before tenant retention due to no blocks") + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "sync before tenant retention") } - level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) } - if retentionByResolution[compact.ResolutionLevel1h].Milliseconds() != 0 { - level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) + + progress.Set(compact.ApplyRetention) + if err := compact.ApplyRetentionPolicyByTenant(ctx, logger, insBkt, sy.Metas(), retentionByTenant, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, metadata.TenantRetentionExpired)); err != nil { + return errors.Wrap(err, "retention by tenant failed") } - retentionByTenant, err := compact.ParesRetentionPolicyByTenant(logger, *conf.retentionTenants) - if err != nil { - level.Error(logger).Log("msg", "failed to parse retention policy by tenant", "err", err) - return err + if err := compactor.Compact(ctx, progress); err != nil { + return errors.Wrap(err, "whole compaction error") } - var cleanMtx sync.Mutex - // TODO(GiedriusS): we could also apply retention policies here but the logic would be a bit more complex. - cleanPartialMarked := func(progress *compact.Progress) error { - cleanMtx.Lock() - defer cleanMtx.Unlock() - defer progress.Idle() + if !conf.disableDownsampling { + // After all compactions are done, work down the downsampling backlog. + // We run two passes of this to ensure that the 1h downsampling is generated + // for 5m downsamplings created in the first run. + level.Info(logger).Log("msg", "start first pass of downsampling") progress.Set(compact.SyncMeta) if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "syncing metas") + return errors.Wrap(err, "sync before first pass of downsampling") + } + progress.Set(compact.DownSampling) + filteredMetas := sy.Metas() + noDownsampleBlocks := noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() + for ul := range noDownsampleBlocks { + delete(filteredMetas, ul) } - progress.Set(compact.CleanBlocks) - compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), insBkt, compactMetrics.partialUploadDeleteAttempts, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures) - if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { - return errors.Wrap(err, "cleaning marked blocks") + for _, meta := range filteredMetas { + resolutionLabel := meta.Thanos.ResolutionString() + downsampleMetrics.downsamples.WithLabelValues(resolutionLabel) + downsampleMetrics.downsampleFailures.WithLabelValues(resolutionLabel) } - compactMetrics.cleanups.Inc() - return nil - } + if err := downsampleBucket( + ctx, + logger, + downsampleMetrics, + insBkt, + filteredMetas, + downsamplingDir, + conf.downsampleConcurrency, + conf.blockFilesConcurrency, + metadata.HashFunc(conf.hashFunc), + conf.acceptMalformedIndex, + ); err != nil { + return errors.Wrap(err, "first pass of downsampling failed") + } - compactMainFn := func(progress *compact.Progress) error { - defer progress.Idle() - // this should happen before any compaction to remove unnecessary process on backlogs beyond retention. - if len(retentionByTenant) != 0 && len(sy.Metas()) == 0 { - level.Info(logger).Log("msg", "sync before tenant retention due to no blocks") - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before tenant retention") - } + level.Info(logger).Log("msg", "start second pass of downsampling") + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "sync before second pass of downsampling") } - progress.Set(compact.ApplyRetention) - if err := compact.ApplyRetentionPolicyByTenant(ctx, logger, insBkt, sy.Metas(), retentionByTenant, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, metadata.TenantRetentionExpired)); err != nil { - return errors.Wrap(err, "retention by tenant failed") + // Regenerate the filtered list of blocks after the sync, + // to include the blocks created by the first pass. + filteredMetas = sy.Metas() + noDownsampleBlocks = noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() + for ul := range noDownsampleBlocks { + delete(filteredMetas, ul) } - if err := compactor.Compact(ctx, progress); err != nil { - return errors.Wrap(err, "whole compaction error") + if err := downsampleBucket( + ctx, + logger, + downsampleMetrics, + insBkt, + filteredMetas, + downsamplingDir, + conf.downsampleConcurrency, + conf.blockFilesConcurrency, + metadata.HashFunc(conf.hashFunc), + conf.acceptMalformedIndex, + ); err != nil { + return errors.Wrap(err, "second pass of downsampling failed") } - if !conf.disableDownsampling { - // After all compactions are done, work down the downsampling backlog. - // We run two passes of this to ensure that the 1h downsampling is generated - // for 5m downsamplings created in the first run. - level.Info(logger).Log("msg", "start first pass of downsampling") - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before first pass of downsampling") - } - progress.Set(compact.DownSampling) - filteredMetas := sy.Metas() - noDownsampleBlocks := noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() - for ul := range noDownsampleBlocks { - delete(filteredMetas, ul) - } + level.Info(logger).Log("msg", "downsampling iterations done") + } else { + level.Info(logger).Log("msg", "downsampling was explicitly disabled") + } - for _, meta := range filteredMetas { - resolutionLabel := meta.Thanos.ResolutionString() - downsampleMetrics.downsamples.WithLabelValues(resolutionLabel) - downsampleMetrics.downsampleFailures.WithLabelValues(resolutionLabel) - } + // TODO(bwplotka): Find a way to avoid syncing if no op was done. + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + return errors.Wrap(err, "sync before retention") + } - if err := downsampleBucket( - ctx, - logger, - downsampleMetrics, - insBkt, - filteredMetas, - downsamplingDir, - conf.downsampleConcurrency, - conf.blockFilesConcurrency, - metadata.HashFunc(conf.hashFunc), - conf.acceptMalformedIndex, - ); err != nil { - return errors.Wrap(err, "first pass of downsampling failed") - } + progress.Set(compact.ApplyRetention) + if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil { + return errors.Wrap(err, "retention failed") + } - level.Info(logger).Log("msg", "start second pass of downsampling") - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before second pass of downsampling") - } + return cleanPartialMarked(progress) + } - // Regenerate the filtered list of blocks after the sync, - // to include the blocks created by the first pass. - filteredMetas = sy.Metas() - noDownsampleBlocks = noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() - for ul := range noDownsampleBlocks { - delete(filteredMetas, ul) - } + // For backwards compatibility, use the existing single-goroutine approach + // The bucketsToCompact[0] is already assigned to insBkt above, and all setup used insBkt + // So the existing compactMainFn will work for the first/only bucket + g.Add(func() error { + defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") - if err := downsampleBucket( - ctx, - logger, - downsampleMetrics, - insBkt, - filteredMetas, - downsamplingDir, - conf.downsampleConcurrency, - conf.blockFilesConcurrency, - metadata.HashFunc(conf.hashFunc), - conf.acceptMalformedIndex, - ); err != nil { - return errors.Wrap(err, "second pass of downsampling failed") - } + if !conf.wait { + return compactMainFn(progressRegistry.Get(compact.Main)) + } - level.Info(logger).Log("msg", "downsampling iterations done") - } else { - level.Info(logger).Log("msg", "downsampling was explicitly disabled") + // --wait=true is specified. + return runutil.Repeat(conf.waitInterval, ctx.Done(), func() error { + err := compactMainFn(progressRegistry.Get(compact.Main)) + if err == nil { + compactMetrics.iterations.Inc() + return nil } - // TODO(bwplotka): Find a way to avoid syncing if no op was done. - progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - return errors.Wrap(err, "sync before retention") + // The HaltError type signals that we hit a critical bug and should block + // for investigation. You should alert on this being halted. + if compact.IsHaltError(err) { + if conf.haltOnError { + level.Error(logger).Log("msg", "critical error detected; halting", "err", err) + compactMetrics.halted.Set(1) + select {} + } else { + return errors.Wrap(err, "critical error detected") + } } - progress.Set(compact.ApplyRetention) - if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil { - return errors.Wrap(err, "retention failed") + // The RetryError signals that we hit an retriable error (transient error, no connection). + // You should alert on this being triggered too frequently. + if compact.IsRetryError(err) { + level.Error(logger).Log("msg", "retriable error", "err", err) + compactMetrics.retried.Inc() + // TODO(bplotka): use actual "retry()" here instead of waiting 5 minutes? + return nil } - return cleanPartialMarked(progress) - } + return errors.Wrap(err, "error executing compaction") + }) + }, func(error) { + cancel() + }) - // For backwards compatibility, use the existing single-goroutine approach - // The bucketsToCompact[0] is already assigned to insBkt above, and all setup used insBkt - // So the existing compactMainFn will work for the first/only bucket + // Periodically remove partial blocks and blocks marked for deletion + // since one iteration potentially could take a long time. + if conf.wait && conf.cleanupBlocksInterval > 0 { g.Add(func() error { - defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") - - if !conf.wait { - return compactMainFn(progressRegistry.Get(compact.Main)) - } - - // --wait=true is specified. - return runutil.Repeat(conf.waitInterval, ctx.Done(), func() error { - err := compactMainFn(progressRegistry.Get(compact.Main)) - if err == nil { - compactMetrics.iterations.Inc() - return nil - } - - // The HaltError type signals that we hit a critical bug and should block - // for investigation. You should alert on this being halted. - if compact.IsHaltError(err) { - if conf.haltOnError { - level.Error(logger).Log("msg", "critical error detected; halting", "err", err) - compactMetrics.halted.Set(1) - select {} - } else { - return errors.Wrap(err, "critical error detected") - } - } - - // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered too frequently. - if compact.IsRetryError(err) { + return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), func() error { + err := cleanPartialMarked(progressRegistry.Get(compact.Cleanup)) + if err != nil && compact.IsRetryError(err) { level.Error(logger).Log("msg", "retriable error", "err", err) compactMetrics.retried.Inc() - // TODO(bplotka): use actual "retry()" here instead of waiting 5 minutes? return nil } - - return errors.Wrap(err, "error executing compaction") + return err }) }, func(error) { cancel() }) + } - if conf.wait { - isFirstTenant := (tenantPrefix == tenantPrefixes[0]) + // Periodically calculate the progress of compaction, downsampling and retention. + if conf.wait && conf.progressCalculateInterval > 0 { + g.Add(func() error { + ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner) + rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution) + var ds *compact.DownsampleProgressCalculator + if !conf.disableDownsampling { + ds = compact.NewDownsampleProgressCalculator(reg) + } - // Only set up web UI and progress for the first tenant - if isFirstTenant && !conf.disableWeb { - r := route.New() + return runutil.Repeat(conf.progressCalculateInterval, ctx.Done(), func() error { + progress := progressRegistry.Get(compact.Calculate) + defer progress.Idle() + progress.Set(compact.SyncMeta) + if err := sy.SyncMetas(ctx); err != nil { + if compact.IsRetryError(err) { + level.Error(logger).Log("msg", "retriable error", "err", err) + compactMetrics.retried.Inc() + return nil + } + return errors.Wrapf(err, "could not sync metas") + } - ins := extpromhttp.NewInstrumentationMiddleware(baseReg, nil) + metas := sy.Metas() + progress.Set(compact.Grouping) + groups, err := grouper.Groups(metas) + if err != nil { + return errors.Wrapf(err, "could not group metadata for compaction") + } + progress.Set(compact.CalculateProgress) + if err = ps.ProgressCalculate(ctx, groups); err != nil { + return errors.Wrapf(err, "could not calculate compaction progress") + } - global := ui.NewBucketUI(logger, conf.webConf.externalPrefix, conf.webConf.prefixHeaderName, component) - global.Register(r, ins) + progress.Set(compact.Grouping) + retGroups, err := grouper.Groups(metas) + if err != nil { + return errors.Wrapf(err, "could not group metadata for retention") + } - // Configure Request Logging for HTTP calls. - opts := []logging.Option{logging.WithDecider(func(_ string, _ error) logging.Decision { - return logging.NoLogCall - })} - logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...) - api.Register(r.WithPrefix("/api/v1"), tracer, logger, ins, logMiddleware) + progress.Set(compact.CalculateProgress) + if err = rs.ProgressCalculate(ctx, retGroups); err != nil { + return errors.Wrapf(err, "could not calculate retention progress") + } - // Separate fetcher for global view. - // TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well. - f := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", baseReg), nil, "component", "globalBucketUI") - f.UpdateOnChange(func(blocks []metadata.Meta, err error) { - api.SetGlobal(blocks, err) - }) + if !conf.disableDownsampling { + progress.Set(compact.Grouping) + groups, err = grouper.Groups(metas) + if err != nil { + return errors.Wrapf(err, "could not group metadata into downsample groups") + } + progress.Set(compact.CalculateProgress) + if err := ds.ProgressCalculate(ctx, groups); err != nil { + return errors.Wrapf(err, "could not calculate downsampling progress") + } + } - srv.Handle("/", r) - - g.Add(func() error { - iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) - _, _, _ = f.Fetch(iterCtx) - iterCancel() - - // For /global state make sure to fetch periodically. - return runutil.Repeat(conf.blockViewerSyncBlockInterval, ctx.Done(), func() error { - return runutil.RetryWithLog(logger, time.Minute, ctx.Done(), func() error { - progress := progressRegistry.Get(compact.Web) - defer progress.Idle() - iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) - defer iterCancel() - progress.Set(compact.SyncMeta) - _, _, err := f.Fetch(iterCtx) - return err - }) - }) - }, func(error) { - cancel() - }) - } + return nil + }) + }, func(error) { + cancel() + }) + } - // Periodically remove partial blocks and blocks marked for deletion - // since one iteration potentially could take a long time. - if conf.cleanupBlocksInterval > 0 { - g.Add(func() error { - return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), func() error { - err := cleanPartialMarked(progressRegistry.Get(compact.Cleanup)) - if err != nil && compact.IsRetryError(err) { - // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered too frequently. - level.Error(logger).Log("msg", "retriable error", "err", err) - compactMetrics.retried.Inc() - - return nil - } + return nil +} - return err - }) - }, func(error) { - cancel() - }) - } +func postProcess( + g *run.Group, + ctx context.Context, + logger log.Logger, + tracer opentracing.Tracer, + srv *httpserver.Server, + conf compactConfig, + reg *prometheus.Registry, + component component.Component, + progressRegistry *compact.ProgressRegistry, + compactMetrics *compactMetrics, + baseMetaFetcher *block.BaseFetcher, + globalAPI *blocksAPI.BlocksAPI, + statusProber prober.Probe, + cancel context.CancelFunc, +) error { + if conf.wait { + if !conf.disableWeb { + r := route.New() + + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) + + global := ui.NewBucketUI(logger, conf.webConf.externalPrefix, conf.webConf.prefixHeaderName, component) + global.Register(r, ins) + + // Configure Request Logging for HTTP calls. + opts := []logging.Option{logging.WithDecider(func(_ string, _ error) logging.Decision { + return logging.NoLogCall + })} + logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...) + globalAPI.Register(r.WithPrefix("/api/v1"), tracer, logger, ins, logMiddleware) + + // Separate fetcher for global view. + // TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well. + f := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI") + f.UpdateOnChange(func(blocks []metadata.Meta, err error) { + globalAPI.SetGlobal(blocks, err) + }) - // Periodically calculate the progress of compaction, downsampling and retention. - if isFirstTenant && conf.progressCalculateInterval > 0 { - g.Add(func() error { - ps := compact.NewCompactionProgressCalculator(baseReg, tsdbPlanner) - rs := compact.NewRetentionProgressCalculator(baseReg, retentionByResolution) - var ds *compact.DownsampleProgressCalculator - if !conf.disableDownsampling { - ds = compact.NewDownsampleProgressCalculator(baseReg) - } + srv.Handle("/", r) + + g.Add(func() error { + iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) + _, _, _ = f.Fetch(iterCtx) + iterCancel() - return runutil.Repeat(conf.progressCalculateInterval, ctx.Done(), func() error { - progress := progressRegistry.Get(compact.Calculate) + // For /global state make sure to fetch periodically. + return runutil.Repeat(conf.blockViewerSyncBlockInterval, ctx.Done(), func() error { + return runutil.RetryWithLog(logger, time.Minute, ctx.Done(), func() error { + progress := progressRegistry.Get(compact.Web) defer progress.Idle() + iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout) + defer iterCancel() progress.Set(compact.SyncMeta) - if err := sy.SyncMetas(ctx); err != nil { - // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered too frequently. - if compact.IsRetryError(err) { - level.Error(logger).Log("msg", "retriable error", "err", err) - compactMetrics.retried.Inc() - - return nil - } - - return errors.Wrapf(err, "could not sync metas") - } - - metas := sy.Metas() - progress.Set(compact.Grouping) - groups, err := grouper.Groups(metas) - if err != nil { - return errors.Wrapf(err, "could not group metadata for compaction") - } - progress.Set(compact.CalculateProgress) - if err = ps.ProgressCalculate(ctx, groups); err != nil { - return errors.Wrapf(err, "could not calculate compaction progress") - } - - progress.Set(compact.Grouping) - retGroups, err := grouper.Groups(metas) - if err != nil { - return errors.Wrapf(err, "could not group metadata for retention") - } - - progress.Set(compact.CalculateProgress) - if err = rs.ProgressCalculate(ctx, retGroups); err != nil { - return errors.Wrapf(err, "could not calculate retention progress") - } - - if !conf.disableDownsampling { - progress.Set(compact.Grouping) - groups, err = grouper.Groups(metas) - if err != nil { - return errors.Wrapf(err, "could not group metadata into downsample groups") - } - progress.Set(compact.CalculateProgress) - if err := ds.ProgressCalculate(ctx, groups); err != nil { - return errors.Wrapf(err, "could not calculate downsampling progress") - } - } - - return nil + _, _, err := f.Fetch(iterCtx) + return err }) - }, func(err error) { - cancel() }) - } + }, func(error) { + cancel() + }) } - } // End of tenant prefix loop - - level.Info(logger).Log("msg", "starting compact node") - statusProber.Ready() + // Note: Cleanup interval and progress calculation are now handled per-tenant + // in runCompactForTenant() since they require per-tenant components (syncer, grouper, planner). + } return nil } From 1a7cc03d8b97b55e83ce64a1275f81a5a13acad4 Mon Sep 17 00:00:00 2001 From: William Zahary Henderson Date: Mon, 8 Dec 2025 21:01:52 +0000 Subject: [PATCH 35/35] linter fix --- cmd/thanos/compact.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index a91945b24a7..9be48fae50a 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -458,13 +458,19 @@ func runCompact( } }() - runCompactForTenant(g, ctx, tenantLogger, tracer, tenantReg, reg, component, conf, flagsMap, tenantPrefix, deleteDelay, + err = runCompactForTenant(g, ctx, tenantLogger, tenantReg, reg, conf, flagsMap, deleteDelay, compactMetrics, progressRegistry, downsampleMetrics, insBkt, relabelConfig, retentionByResolution, levels, dedupReplicaLabels, enableVerticalCompaction, mergeFunc, compactDir, downsamplingDir, cancel) + if err != nil { + return errors.Wrap(err, "failed to run compact for tenant") + } } - postProcess(g, ctx, logger, tracer, srv, conf, reg, component, progressRegistry, compactMetrics, baseMetaFetcher, globalAPI, statusProber, cancel) + err = postProcess(g, ctx, logger, tracer, srv, conf, reg, component, progressRegistry, baseMetaFetcher, globalAPI, cancel) + if err != nil { + return errors.Wrap(err, "failed to post process after compacting tenants") + } level.Info(logger).Log("msg", "starting compact node") statusProber.Ready() @@ -475,13 +481,10 @@ func runCompactForTenant( g *run.Group, ctx context.Context, logger log.Logger, - tracer opentracing.Tracer, reg prometheus.Registerer, baseReg *prometheus.Registry, - component component.Component, conf compactConfig, flagsMap map[string]string, - tenantPrefix string, deleteDelay time.Duration, compactMetrics *compactMetrics, progressRegistry *compact.ProgressRegistry, @@ -895,10 +898,8 @@ func postProcess( reg *prometheus.Registry, component component.Component, progressRegistry *compact.ProgressRegistry, - compactMetrics *compactMetrics, baseMetaFetcher *block.BaseFetcher, globalAPI *blocksAPI.BlocksAPI, - statusProber prober.Probe, cancel context.CancelFunc, ) error { if conf.wait { @@ -948,7 +949,7 @@ func postProcess( }) } - // Note: Cleanup interval and progress calculation are now handled per-tenant + // Note: Cleanup interval and progress calculation are handled per-tenant // in runCompactForTenant() since they require per-tenant components (syncer, grouper, planner). } return nil