From 5bc08d6900c24bc045473600b42ff7510bba3c40 Mon Sep 17 00:00:00 2001 From: Forrest Babcock Date: Sun, 14 Dec 2025 14:50:33 -0500 Subject: [PATCH 1/2] TRT-2389: Generate test summaries --- .../types_row_generic_test_summary.go | 21 +++ .../types_row_test_summary.go | 23 +++ .../jobrunaggregatorlib/ci_data_client.go | 163 ++++++++++++++++++ .../retrying_ci_data_client.go | 20 +++ .../jobrunaggregatorlib/util.go | 36 ++-- .../jobrunhistoricaldataanalyzer/analyzer.go | 46 ++++- .../jobrunhistoricaldataanalyzer/cmd.go | 7 +- .../jobrunhistoricaldataanalyzer/util.go | 43 +++++ 8 files changed, 337 insertions(+), 22 deletions(-) create mode 100644 pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go create mode 100644 pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go diff --git a/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go b/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go new file mode 100644 index 00000000000..13d19ba7c50 --- /dev/null +++ b/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go @@ -0,0 +1,21 @@ +package jobrunaggregatorapi + +import ( + "cloud.google.com/go/civil" +) + +// GenericTestSummaryByPeriodRow represents aggregated test results for a specific suite and release over a time period, +// without breaking down by infrastructure dimensions (platform, topology, architecture). +// This provides a higher-level view of test reliability across all infrastructure variants. +type GenericTestSummaryByPeriodRow struct { + Release string `bigquery:"release"` + TestName string `bigquery:"test_name"` + TotalTestCount int64 `bigquery:"total_test_count"` + TotalFailureCount int64 `bigquery:"total_failure_count"` + TotalFlakeCount int64 `bigquery:"total_flake_count"` + FailureRate float64 `bigquery:"failure_rate"` + AvgDurationMs float64 `bigquery:"avg_duration_ms"` + PeriodStart civil.Date `bigquery:"period_start"` + PeriodEnd civil.Date `bigquery:"period_end"` + DaysWithData int64 `bigquery:"days_with_data"` +} diff --git a/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go b/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go new file mode 100644 index 00000000000..42c57d2ba8a --- /dev/null +++ b/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go @@ -0,0 +1,23 @@ +package jobrunaggregatorapi + +import ( + "cloud.google.com/go/civil" +) + +// TestSummaryByPeriodRow represents aggregated test results for a specific suite and release over a time period. +// This data structure corresponds to the suite_summary_by_period.sql query results. +type TestSummaryByPeriodRow struct { + Release string `bigquery:"release"` + Platform string `bigquery:"platform"` + Topology string `bigquery:"topology"` + Architecture string `bigquery:"architecture"` + TestName string `bigquery:"test_name"` + TotalTestCount int64 `bigquery:"total_test_count"` + TotalFailureCount int64 `bigquery:"total_failure_count"` + TotalFlakeCount int64 `bigquery:"total_flake_count"` + FailureRate float64 `bigquery:"failure_rate"` + AvgDurationMs float64 `bigquery:"avg_duration_ms"` + PeriodStart civil.Date `bigquery:"period_start"` + PeriodEnd civil.Date `bigquery:"period_end"` + DaysWithData int64 `bigquery:"days_with_data"` +} diff --git a/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go b/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go index 992bdce5b1b..2792d6660a5 100644 --- a/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go +++ b/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go @@ -66,6 +66,24 @@ type CIDataClient interface { // ListReleases lists all releases from the new release table ListReleases(ctx context.Context) ([]jobrunaggregatorapi.ReleaseRow, error) + + // ListTestSummaryByPeriod retrieves aggregated test results for a specific suite and release over a time period. + // Parameters: + // - suiteName: The test suite to query (e.g., 'conformance') + // - releaseName: The release version (e.g., '4.15') + // - daysBack: Number of days to look back from current date + // - minTestCount: Minimum number of test executions required to include a test in results + ListTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.TestSummaryByPeriodRow, error) + + // ListGenericTestSummaryByPeriod retrieves aggregated test results for a specific suite and release over a time period, + // aggregated across all infrastructure dimensions (platform, topology, architecture). + // This provides a higher-level view of test reliability. + // Parameters: + // - suiteName: The test suite to query (e.g., 'conformance') + // - releaseName: The release version (e.g., '4.15') + // - daysBack: Number of days to look back from current date + // - minTestCount: Minimum number of test executions required to include a test in results + ListGenericTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.GenericTestSummaryByPeriodRow, error) } type ciDataClient struct { @@ -1095,3 +1113,148 @@ func (c *ciDataClient) ListAllKnownAlerts(ctx context.Context) ([]*jobrunaggrega return allKnownAlerts, nil } + +func (c *ciDataClient) ListTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.TestSummaryByPeriodRow, error) { + // Query to summarize test results for a specific suite and release over a time period + // Groups by release, platform, topology, architecture, and test_name + // Calculates total test_count, failure_count, flake_count, failure_rate, and avg_duration_ms + // Filters results to only include tests with sufficient test runs + queryString := c.dataCoordinates.SubstituteDataSetLocation(` +SELECT + release, + platform, + topology, + architecture, + test_name, + SUM(test_count) AS total_test_count, + SUM(failure_count) AS total_failure_count, + SUM(flake_count) AS total_flake_count, + SAFE_DIVIDE(SUM(failure_count), SUM(test_count)) AS failure_rate, + AVG(avg_duration_ms) AS avg_duration_ms, + MIN(date) AS period_start, + MAX(date) AS period_end, + COUNT(DISTINCT date) AS days_with_data +FROM + DATA_SET_LOCATION.TestsSummaryByDate +WHERE + suite = @suite_name + AND release = @release_name + AND date >= DATE_SUB(CURRENT_DATE(), INTERVAL @days_back DAY) + AND date <= CURRENT_DATE() +GROUP BY + release, + platform, + topology, + architecture, + test_name +HAVING + SUM(test_count) > @min_test_count +ORDER BY + release, + platform, + topology, + architecture, + total_failure_count DESC, + test_name +`) + + query := c.client.Query(queryString) + query.Labels = map[string]string{ + bigQueryLabelKeyApp: bigQueryLabelValueApp, + bigQueryLabelKeyQuery: bigQueryLabelValueTestSummaryByPeriod, + } + query.QueryConfig.Parameters = []bigquery.QueryParameter{ + {Name: "suite_name", Value: suiteName}, + {Name: "release_name", Value: releaseName}, + {Name: "days_back", Value: daysBack}, + {Name: "min_test_count", Value: minTestCount}, + } + + rows, err := query.Read(ctx) + if err != nil { + return nil, fmt.Errorf("failed to query test summary by period with %q: %w", queryString, err) + } + + results := []jobrunaggregatorapi.TestSummaryByPeriodRow{} + for { + row := &jobrunaggregatorapi.TestSummaryByPeriodRow{} + err = rows.Next(row) + if err == iterator.Done { + break + } + if err != nil { + return nil, err + } + results = append(results, *row) + } + + return results, nil +} + +func (c *ciDataClient) ListGenericTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.GenericTestSummaryByPeriodRow, error) { + // Query to summarize test results for a specific suite and release over a time period + // Groups by release and test_name only (no infrastructure dimensions) + // Calculates total test_count, failure_count, flake_count, failure_rate, and avg_duration_ms + // Filters results to only include tests with sufficient test runs + queryString := c.dataCoordinates.SubstituteDataSetLocation(` +SELECT + release, + test_name, + SUM(test_count) AS total_test_count, + SUM(failure_count) AS total_failure_count, + SUM(flake_count) AS total_flake_count, + SAFE_DIVIDE(SUM(failure_count), SUM(test_count)) AS failure_rate, + AVG(avg_duration_ms) AS avg_duration_ms, + MIN(date) AS period_start, + MAX(date) AS period_end, + COUNT(DISTINCT date) AS days_with_data +FROM + DATA_SET_LOCATION.TestsSummaryByDate +WHERE + suite = @suite_name + AND release = @release_name + AND date >= DATE_SUB(CURRENT_DATE(), INTERVAL @days_back DAY) + AND date <= CURRENT_DATE() +GROUP BY + release, + test_name +HAVING + SUM(test_count) > @min_test_count +ORDER BY + release, + total_failure_count DESC, + test_name +`) + + query := c.client.Query(queryString) + query.Labels = map[string]string{ + bigQueryLabelKeyApp: bigQueryLabelValueApp, + bigQueryLabelKeyQuery: bigQueryLabelValueGenericTestSummaryByPeriod, + } + query.QueryConfig.Parameters = []bigquery.QueryParameter{ + {Name: "suite_name", Value: suiteName}, + {Name: "release_name", Value: releaseName}, + {Name: "days_back", Value: daysBack}, + {Name: "min_test_count", Value: minTestCount}, + } + + rows, err := query.Read(ctx) + if err != nil { + return nil, fmt.Errorf("failed to query generic test summary by period with %q: %w", queryString, err) + } + + results := []jobrunaggregatorapi.GenericTestSummaryByPeriodRow{} + for { + row := &jobrunaggregatorapi.GenericTestSummaryByPeriodRow{} + err = rows.Next(row) + if err == iterator.Done { + break + } + if err != nil { + return nil, err + } + results = append(results, *row) + } + + return results, nil +} diff --git a/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go b/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go index 43096a4cb08..e05e638d108 100644 --- a/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go +++ b/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go @@ -185,6 +185,26 @@ func (c *retryingCIDataClient) ListAllKnownAlerts(ctx context.Context) ([]*jobru return ret, err } +func (c *retryingCIDataClient) ListTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.TestSummaryByPeriodRow, error) { + var ret []jobrunaggregatorapi.TestSummaryByPeriodRow + err := retry.OnError(slowBackoff, isReadQuotaError, func() error { + var innerErr error + ret, innerErr = c.delegate.ListTestSummaryByPeriod(ctx, suiteName, releaseName, daysBack, minTestCount) + return innerErr + }) + return ret, err +} + +func (c *retryingCIDataClient) ListGenericTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.GenericTestSummaryByPeriodRow, error) { + var ret []jobrunaggregatorapi.GenericTestSummaryByPeriodRow + err := retry.OnError(slowBackoff, isReadQuotaError, func() error { + var innerErr error + ret, innerErr = c.delegate.ListGenericTestSummaryByPeriod(ctx, suiteName, releaseName, daysBack, minTestCount) + return innerErr + }) + return ret, err +} + var slowBackoff = wait.Backoff{ Steps: 4, Duration: 10 * time.Second, diff --git a/pkg/jobrunaggregator/jobrunaggregatorlib/util.go b/pkg/jobrunaggregator/jobrunaggregatorlib/util.go index 07f7343adce..ce840a0c82a 100644 --- a/pkg/jobrunaggregator/jobrunaggregatorlib/util.go +++ b/pkg/jobrunaggregator/jobrunaggregatorlib/util.go @@ -37,23 +37,25 @@ const ( ) const ( - bigQueryLabelKeyApp = "client-application" - bigQueryLabelKeyQuery = "query-details" - bigQueryLabelValueApp = "aggregator" - bigQueryLabelValueDisruptionRowCountByJob = "disruption-row-count" - bigQueryLabelValueDisruptionStats = "aggregator-disruption-stats" - bigQueryLabelValueJobRunFromName = "aggregator-job-run-from-name" - bigQueryLabelValueLastJobRunTime = "aggregator-last-job-run-time" - bigQueryLabelValueAggregatedTestRun = "aggregator-aggregated-test-run" - bigQueryLabelValueAlertHistoricalData = "aggregator-alert-historical" - bigQueryLabelValueAllJobs = "aggregator-all-jobs" - bigQueryLabelValueAllJobsWithVariants = "aggregator-all-jobs-with-variants" - bigQueryLabelValueAllKnownAlerts = "aggregator-all-known-alerts" - bigQueryLabelValueDisruptionHistoricalData = "aggregator-disruption-historical" - bigQueryLabelValueJobRunsSinceTime = "aggregator-job-runs-since-time" - bigQueryLabelValueAllReleases = "aggregator-all-releases" - bigQueryLabelValueReleaseTags = "aggregator-release-tags" - bigQueryLabelValueJobRunIDsSinceTime = "aggregator-job-run-ids-since-time" + bigQueryLabelKeyApp = "client-application" + bigQueryLabelKeyQuery = "query-details" + bigQueryLabelValueApp = "aggregator" + bigQueryLabelValueDisruptionRowCountByJob = "disruption-row-count" + bigQueryLabelValueDisruptionStats = "aggregator-disruption-stats" + bigQueryLabelValueJobRunFromName = "aggregator-job-run-from-name" + bigQueryLabelValueLastJobRunTime = "aggregator-last-job-run-time" + bigQueryLabelValueAggregatedTestRun = "aggregator-aggregated-test-run" + bigQueryLabelValueAlertHistoricalData = "aggregator-alert-historical" + bigQueryLabelValueAllJobs = "aggregator-all-jobs" + bigQueryLabelValueAllJobsWithVariants = "aggregator-all-jobs-with-variants" + bigQueryLabelValueAllKnownAlerts = "aggregator-all-known-alerts" + bigQueryLabelValueDisruptionHistoricalData = "aggregator-disruption-historical" + bigQueryLabelValueJobRunsSinceTime = "aggregator-job-runs-since-time" + bigQueryLabelValueAllReleases = "aggregator-all-releases" + bigQueryLabelValueReleaseTags = "aggregator-release-tags" + bigQueryLabelValueJobRunIDsSinceTime = "aggregator-job-run-ids-since-time" + bigQueryLabelValueTestSummaryByPeriod = "aggregator-test-summary-by-period" + bigQueryLabelValueGenericTestSummaryByPeriod = "aggregator-generic-test-summary-by-period" ) var ( diff --git a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go index 01db44fa24a..b4edb92f23c 100644 --- a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go +++ b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go @@ -24,8 +24,6 @@ type JobRunHistoricalDataAnalyzerOptions struct { func (o *JobRunHistoricalDataAnalyzerOptions) Run(ctx context.Context) error { - var newHistoricalData []jobrunaggregatorapi.HistoricalData - // targetRelease will either be what the caller specified on the CLI, or the most recent release. // previousRelease will be the one prior to targetRelease. var targetRelease, previousRelease string @@ -44,6 +42,14 @@ func (o *JobRunHistoricalDataAnalyzerOptions) Run(ctx context.Context) error { } fmt.Printf("Using target release: %s, previous release: %s\n", targetRelease, previousRelease) + // For tests data type, we don't do comparison - just fetch and write directly + if o.dataType == "tests" { + return o.runTestsDataType(ctx, targetRelease) + } + + // For other data types (alerts, disruptions), continue with comparison logic + var newHistoricalData []jobrunaggregatorapi.HistoricalData + currentHistoricalData, err := readHistoricalDataFile(o.currentFile, o.dataType) if err != nil { return err @@ -91,6 +97,42 @@ func (o *JobRunHistoricalDataAnalyzerOptions) Run(ctx context.Context) error { return nil } +func (o *JobRunHistoricalDataAnalyzerOptions) runTestsDataType(ctx context.Context, release string) error { + // Hardcoded parameters for test summary query + const ( + suiteName = "openshift-tests" + daysBack = 30 + minTestCount = 100 + ) + + fmt.Printf("Fetching test data for release %s, suite %s, last %d days, min %d test runs\n", + release, suiteName, daysBack, minTestCount) + + // testSummaries, err := o.ciDataClient.ListTestSummaryByPeriod(ctx, suiteName, release, daysBack, minTestCount) + testSummaries, err := o.ciDataClient.ListGenericTestSummaryByPeriod(ctx, suiteName, release, daysBack, minTestCount) + if err != nil { + return fmt.Errorf("failed to list test summary by period: %w", err) + } + + if len(testSummaries) == 0 { + return fmt.Errorf("no test data found for suite %s, release %s", suiteName, release) + } + + // Write the test summaries directly to the output file as JSON + // out, err := formatTestOutput(testSummaries) + out, err := formatGenericTestOutput(testSummaries) + if err != nil { + return fmt.Errorf("error formatting test output: %w", err) + } + + if err := os.WriteFile(o.outputFile, out, 0644); err != nil { + return fmt.Errorf("failed to write output file: %w", err) + } + + fmt.Printf("Successfully fetched %d test results and wrote to %s\n", len(testSummaries), o.outputFile) + return nil +} + func (o *JobRunHistoricalDataAnalyzerOptions) getAlertData(ctx context.Context) ([]jobrunaggregatorapi.HistoricalData, error) { var allKnownAlerts []*jobrunaggregatorapi.KnownAlertRow var newHistoricalData []*jobrunaggregatorapi.AlertHistoricalDataRow diff --git a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/cmd.go b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/cmd.go index 17934ba69c8..c6b530393c7 100644 --- a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/cmd.go +++ b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/cmd.go @@ -26,7 +26,7 @@ type JobRunHistoricalDataAnalyzerFlags struct { PreviousRelease string } -var supportedDataTypes = sets.New[string]("alerts", "disruptions") +var supportedDataTypes = sets.New[string]("alerts", "disruptions", "tests") func NewJobRunHistoricalDataAnalyzerFlags() *JobRunHistoricalDataAnalyzerFlags { return &JobRunHistoricalDataAnalyzerFlags{ @@ -60,7 +60,8 @@ func (f *JobRunHistoricalDataAnalyzerFlags) Validate() error { return fmt.Errorf("must provide supported datatype %v", sets.List(supportedDataTypes)) } - if f.CurrentFile == "" { + // For tests data type, we don't need --current since we don't do comparison + if f.DataType != "tests" && f.CurrentFile == "" { return fmt.Errorf("must provide --current [file_path] flag to compare against") } @@ -68,7 +69,7 @@ func (f *JobRunHistoricalDataAnalyzerFlags) Validate() error { return fmt.Errorf("leeway percent must be above 0") } - if f.TargetRelease != "" && f.PreviousRelease == "" { + if f.TargetRelease != "" && f.PreviousRelease == "" && f.DataType != "tests" { return fmt.Errorf("must specify --previous-release with --target-release") } diff --git a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go index 36e21cb8e87..d5a60347185 100644 --- a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go +++ b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go @@ -166,3 +166,46 @@ func formatOutput(data []parsedJobData, format string) ([]byte, error) { return nil, fmt.Errorf("invalid output format (%s)", format) } } + +func formatTestOutput(data []jobrunaggregatorapi.TestSummaryByPeriodRow) ([]byte, error) { + if len(data) == 0 { + return nil, nil + } + // Sort by release, platform, topology, architecture, failure count desc, test name + sort.SliceStable(data, func(i, j int) bool { + if data[i].Release != data[j].Release { + return data[i].Release < data[j].Release + } + if data[i].Platform != data[j].Platform { + return data[i].Platform < data[j].Platform + } + if data[i].Topology != data[j].Topology { + return data[i].Topology < data[j].Topology + } + if data[i].Architecture != data[j].Architecture { + return data[i].Architecture < data[j].Architecture + } + if data[i].TotalFailureCount != data[j].TotalFailureCount { + return data[i].TotalFailureCount > data[j].TotalFailureCount + } + return data[i].TestName < data[j].TestName + }) + return json.MarshalIndent(data, "", " ") +} + +func formatGenericTestOutput(data []jobrunaggregatorapi.GenericTestSummaryByPeriodRow) ([]byte, error) { + if len(data) == 0 { + return nil, nil + } + // Sort by release, failure count desc, test name + sort.SliceStable(data, func(i, j int) bool { + if data[i].Release != data[j].Release { + return data[i].Release < data[j].Release + } + if data[i].TotalFailureCount != data[j].TotalFailureCount { + return data[i].TotalFailureCount > data[j].TotalFailureCount + } + return data[i].TestName < data[j].TestName + }) + return json.MarshalIndent(data, "", " ") +} From c5366f476ba20a701f2b4ae3367725d37e36c84f Mon Sep 17 00:00:00 2001 From: Forrest Babcock Date: Sun, 14 Dec 2025 15:00:13 -0500 Subject: [PATCH 2/2] TRT-2389: Remove variant specific impl --- .../types_row_generic_test_summary.go | 21 ----- .../types_row_test_summary.go | 3 - .../jobrunaggregatorlib/ci_data_client.go | 91 +------------------ .../retrying_ci_data_client.go | 10 -- .../jobrunaggregatorlib/util.go | 37 ++++---- .../jobrunhistoricaldataanalyzer/analyzer.go | 6 +- .../jobrunhistoricaldataanalyzer/util.go | 26 ------ 7 files changed, 22 insertions(+), 172 deletions(-) delete mode 100644 pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go diff --git a/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go b/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go deleted file mode 100644 index 13d19ba7c50..00000000000 --- a/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_generic_test_summary.go +++ /dev/null @@ -1,21 +0,0 @@ -package jobrunaggregatorapi - -import ( - "cloud.google.com/go/civil" -) - -// GenericTestSummaryByPeriodRow represents aggregated test results for a specific suite and release over a time period, -// without breaking down by infrastructure dimensions (platform, topology, architecture). -// This provides a higher-level view of test reliability across all infrastructure variants. -type GenericTestSummaryByPeriodRow struct { - Release string `bigquery:"release"` - TestName string `bigquery:"test_name"` - TotalTestCount int64 `bigquery:"total_test_count"` - TotalFailureCount int64 `bigquery:"total_failure_count"` - TotalFlakeCount int64 `bigquery:"total_flake_count"` - FailureRate float64 `bigquery:"failure_rate"` - AvgDurationMs float64 `bigquery:"avg_duration_ms"` - PeriodStart civil.Date `bigquery:"period_start"` - PeriodEnd civil.Date `bigquery:"period_end"` - DaysWithData int64 `bigquery:"days_with_data"` -} diff --git a/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go b/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go index 42c57d2ba8a..cb569e33ada 100644 --- a/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go +++ b/pkg/jobrunaggregator/jobrunaggregatorapi/types_row_test_summary.go @@ -8,9 +8,6 @@ import ( // This data structure corresponds to the suite_summary_by_period.sql query results. type TestSummaryByPeriodRow struct { Release string `bigquery:"release"` - Platform string `bigquery:"platform"` - Topology string `bigquery:"topology"` - Architecture string `bigquery:"architecture"` TestName string `bigquery:"test_name"` TotalTestCount int64 `bigquery:"total_test_count"` TotalFailureCount int64 `bigquery:"total_failure_count"` diff --git a/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go b/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go index 2792d6660a5..625c3b64838 100644 --- a/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go +++ b/pkg/jobrunaggregator/jobrunaggregatorlib/ci_data_client.go @@ -74,16 +74,6 @@ type CIDataClient interface { // - daysBack: Number of days to look back from current date // - minTestCount: Minimum number of test executions required to include a test in results ListTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.TestSummaryByPeriodRow, error) - - // ListGenericTestSummaryByPeriod retrieves aggregated test results for a specific suite and release over a time period, - // aggregated across all infrastructure dimensions (platform, topology, architecture). - // This provides a higher-level view of test reliability. - // Parameters: - // - suiteName: The test suite to query (e.g., 'conformance') - // - releaseName: The release version (e.g., '4.15') - // - daysBack: Number of days to look back from current date - // - minTestCount: Minimum number of test executions required to include a test in results - ListGenericTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.GenericTestSummaryByPeriodRow, error) } type ciDataClient struct { @@ -1116,15 +1106,12 @@ func (c *ciDataClient) ListAllKnownAlerts(ctx context.Context) ([]*jobrunaggrega func (c *ciDataClient) ListTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.TestSummaryByPeriodRow, error) { // Query to summarize test results for a specific suite and release over a time period - // Groups by release, platform, topology, architecture, and test_name + // Groups by release and test_name only (no infrastructure dimensions) // Calculates total test_count, failure_count, flake_count, failure_rate, and avg_duration_ms // Filters results to only include tests with sufficient test runs queryString := c.dataCoordinates.SubstituteDataSetLocation(` SELECT release, - platform, - topology, - architecture, test_name, SUM(test_count) AS total_test_count, SUM(failure_count) AS total_failure_count, @@ -1143,17 +1130,11 @@ WHERE AND date <= CURRENT_DATE() GROUP BY release, - platform, - topology, - architecture, test_name HAVING SUM(test_count) > @min_test_count ORDER BY release, - platform, - topology, - architecture, total_failure_count DESC, test_name `) @@ -1172,7 +1153,7 @@ ORDER BY rows, err := query.Read(ctx) if err != nil { - return nil, fmt.Errorf("failed to query test summary by period with %q: %w", queryString, err) + return nil, fmt.Errorf("failed to query generic test summary by period with %q: %w", queryString, err) } results := []jobrunaggregatorapi.TestSummaryByPeriodRow{} @@ -1190,71 +1171,3 @@ ORDER BY return results, nil } - -func (c *ciDataClient) ListGenericTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.GenericTestSummaryByPeriodRow, error) { - // Query to summarize test results for a specific suite and release over a time period - // Groups by release and test_name only (no infrastructure dimensions) - // Calculates total test_count, failure_count, flake_count, failure_rate, and avg_duration_ms - // Filters results to only include tests with sufficient test runs - queryString := c.dataCoordinates.SubstituteDataSetLocation(` -SELECT - release, - test_name, - SUM(test_count) AS total_test_count, - SUM(failure_count) AS total_failure_count, - SUM(flake_count) AS total_flake_count, - SAFE_DIVIDE(SUM(failure_count), SUM(test_count)) AS failure_rate, - AVG(avg_duration_ms) AS avg_duration_ms, - MIN(date) AS period_start, - MAX(date) AS period_end, - COUNT(DISTINCT date) AS days_with_data -FROM - DATA_SET_LOCATION.TestsSummaryByDate -WHERE - suite = @suite_name - AND release = @release_name - AND date >= DATE_SUB(CURRENT_DATE(), INTERVAL @days_back DAY) - AND date <= CURRENT_DATE() -GROUP BY - release, - test_name -HAVING - SUM(test_count) > @min_test_count -ORDER BY - release, - total_failure_count DESC, - test_name -`) - - query := c.client.Query(queryString) - query.Labels = map[string]string{ - bigQueryLabelKeyApp: bigQueryLabelValueApp, - bigQueryLabelKeyQuery: bigQueryLabelValueGenericTestSummaryByPeriod, - } - query.QueryConfig.Parameters = []bigquery.QueryParameter{ - {Name: "suite_name", Value: suiteName}, - {Name: "release_name", Value: releaseName}, - {Name: "days_back", Value: daysBack}, - {Name: "min_test_count", Value: minTestCount}, - } - - rows, err := query.Read(ctx) - if err != nil { - return nil, fmt.Errorf("failed to query generic test summary by period with %q: %w", queryString, err) - } - - results := []jobrunaggregatorapi.GenericTestSummaryByPeriodRow{} - for { - row := &jobrunaggregatorapi.GenericTestSummaryByPeriodRow{} - err = rows.Next(row) - if err == iterator.Done { - break - } - if err != nil { - return nil, err - } - results = append(results, *row) - } - - return results, nil -} diff --git a/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go b/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go index e05e638d108..a06339b5afc 100644 --- a/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go +++ b/pkg/jobrunaggregator/jobrunaggregatorlib/retrying_ci_data_client.go @@ -195,16 +195,6 @@ func (c *retryingCIDataClient) ListTestSummaryByPeriod(ctx context.Context, suit return ret, err } -func (c *retryingCIDataClient) ListGenericTestSummaryByPeriod(ctx context.Context, suiteName, releaseName string, daysBack, minTestCount int) ([]jobrunaggregatorapi.GenericTestSummaryByPeriodRow, error) { - var ret []jobrunaggregatorapi.GenericTestSummaryByPeriodRow - err := retry.OnError(slowBackoff, isReadQuotaError, func() error { - var innerErr error - ret, innerErr = c.delegate.ListGenericTestSummaryByPeriod(ctx, suiteName, releaseName, daysBack, minTestCount) - return innerErr - }) - return ret, err -} - var slowBackoff = wait.Backoff{ Steps: 4, Duration: 10 * time.Second, diff --git a/pkg/jobrunaggregator/jobrunaggregatorlib/util.go b/pkg/jobrunaggregator/jobrunaggregatorlib/util.go index ce840a0c82a..c44f239b046 100644 --- a/pkg/jobrunaggregator/jobrunaggregatorlib/util.go +++ b/pkg/jobrunaggregator/jobrunaggregatorlib/util.go @@ -37,25 +37,24 @@ const ( ) const ( - bigQueryLabelKeyApp = "client-application" - bigQueryLabelKeyQuery = "query-details" - bigQueryLabelValueApp = "aggregator" - bigQueryLabelValueDisruptionRowCountByJob = "disruption-row-count" - bigQueryLabelValueDisruptionStats = "aggregator-disruption-stats" - bigQueryLabelValueJobRunFromName = "aggregator-job-run-from-name" - bigQueryLabelValueLastJobRunTime = "aggregator-last-job-run-time" - bigQueryLabelValueAggregatedTestRun = "aggregator-aggregated-test-run" - bigQueryLabelValueAlertHistoricalData = "aggregator-alert-historical" - bigQueryLabelValueAllJobs = "aggregator-all-jobs" - bigQueryLabelValueAllJobsWithVariants = "aggregator-all-jobs-with-variants" - bigQueryLabelValueAllKnownAlerts = "aggregator-all-known-alerts" - bigQueryLabelValueDisruptionHistoricalData = "aggregator-disruption-historical" - bigQueryLabelValueJobRunsSinceTime = "aggregator-job-runs-since-time" - bigQueryLabelValueAllReleases = "aggregator-all-releases" - bigQueryLabelValueReleaseTags = "aggregator-release-tags" - bigQueryLabelValueJobRunIDsSinceTime = "aggregator-job-run-ids-since-time" - bigQueryLabelValueTestSummaryByPeriod = "aggregator-test-summary-by-period" - bigQueryLabelValueGenericTestSummaryByPeriod = "aggregator-generic-test-summary-by-period" + bigQueryLabelKeyApp = "client-application" + bigQueryLabelKeyQuery = "query-details" + bigQueryLabelValueApp = "aggregator" + bigQueryLabelValueDisruptionRowCountByJob = "disruption-row-count" + bigQueryLabelValueDisruptionStats = "aggregator-disruption-stats" + bigQueryLabelValueJobRunFromName = "aggregator-job-run-from-name" + bigQueryLabelValueLastJobRunTime = "aggregator-last-job-run-time" + bigQueryLabelValueAggregatedTestRun = "aggregator-aggregated-test-run" + bigQueryLabelValueAlertHistoricalData = "aggregator-alert-historical" + bigQueryLabelValueAllJobs = "aggregator-all-jobs" + bigQueryLabelValueAllJobsWithVariants = "aggregator-all-jobs-with-variants" + bigQueryLabelValueAllKnownAlerts = "aggregator-all-known-alerts" + bigQueryLabelValueDisruptionHistoricalData = "aggregator-disruption-historical" + bigQueryLabelValueJobRunsSinceTime = "aggregator-job-runs-since-time" + bigQueryLabelValueAllReleases = "aggregator-all-releases" + bigQueryLabelValueReleaseTags = "aggregator-release-tags" + bigQueryLabelValueJobRunIDsSinceTime = "aggregator-job-run-ids-since-time" + bigQueryLabelValueTestSummaryByPeriod = "aggregator-test-summary-by-period" ) var ( diff --git a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go index b4edb92f23c..a3238f11157 100644 --- a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go +++ b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/analyzer.go @@ -108,8 +108,7 @@ func (o *JobRunHistoricalDataAnalyzerOptions) runTestsDataType(ctx context.Conte fmt.Printf("Fetching test data for release %s, suite %s, last %d days, min %d test runs\n", release, suiteName, daysBack, minTestCount) - // testSummaries, err := o.ciDataClient.ListTestSummaryByPeriod(ctx, suiteName, release, daysBack, minTestCount) - testSummaries, err := o.ciDataClient.ListGenericTestSummaryByPeriod(ctx, suiteName, release, daysBack, minTestCount) + testSummaries, err := o.ciDataClient.ListTestSummaryByPeriod(ctx, suiteName, release, daysBack, minTestCount) if err != nil { return fmt.Errorf("failed to list test summary by period: %w", err) } @@ -119,8 +118,7 @@ func (o *JobRunHistoricalDataAnalyzerOptions) runTestsDataType(ctx context.Conte } // Write the test summaries directly to the output file as JSON - // out, err := formatTestOutput(testSummaries) - out, err := formatGenericTestOutput(testSummaries) + out, err := formatTestOutput(testSummaries) if err != nil { return fmt.Errorf("error formatting test output: %w", err) } diff --git a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go index d5a60347185..6507fb38748 100644 --- a/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go +++ b/pkg/jobrunaggregator/jobrunhistoricaldataanalyzer/util.go @@ -168,32 +168,6 @@ func formatOutput(data []parsedJobData, format string) ([]byte, error) { } func formatTestOutput(data []jobrunaggregatorapi.TestSummaryByPeriodRow) ([]byte, error) { - if len(data) == 0 { - return nil, nil - } - // Sort by release, platform, topology, architecture, failure count desc, test name - sort.SliceStable(data, func(i, j int) bool { - if data[i].Release != data[j].Release { - return data[i].Release < data[j].Release - } - if data[i].Platform != data[j].Platform { - return data[i].Platform < data[j].Platform - } - if data[i].Topology != data[j].Topology { - return data[i].Topology < data[j].Topology - } - if data[i].Architecture != data[j].Architecture { - return data[i].Architecture < data[j].Architecture - } - if data[i].TotalFailureCount != data[j].TotalFailureCount { - return data[i].TotalFailureCount > data[j].TotalFailureCount - } - return data[i].TestName < data[j].TestName - }) - return json.MarshalIndent(data, "", " ") -} - -func formatGenericTestOutput(data []jobrunaggregatorapi.GenericTestSummaryByPeriodRow) ([]byte, error) { if len(data) == 0 { return nil, nil }