From c879aeae6347f960fa0efea2cb73c147b6218322 Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 11:34:03 -0500 Subject: [PATCH 1/8] Add support for multiple values in --breakout option Enhanced the --breakout option in get-metric-data.js to support comma-separated values (e.g., --breakout hostname=a,b,c) which returns separate metrics for each specified value. This addresses issue #110. Changes: - Modified list() parser to distinguish between field separators and value lists - Updated OpenSearch query builder to use "terms" query for multiple values - Added documentation with examples and usage guidelines The implementation maintains backward compatibility and is designed to support future aggregation syntax (e.g., hostname=a+b). Co-Authored-By: Claude Sonnet 4.5 --- queries/cdmq/README.md | 23 ++++++++++++++++ queries/cdmq/cdm.js | 13 ++++++++- queries/cdmq/get-metric-data.js | 48 ++++++++++++++++++++++++++++++++- 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/queries/cdmq/README.md b/queries/cdmq/README.md index 215d442f..74de317f 100644 --- a/queries/cdmq/README.md +++ b/queries/cdmq/README.md @@ -383,6 +383,29 @@ When evaluating these breakouts, we can see that `<1>---` }, "breakouts": [] } + +#### Specifying Multiple Values for a Breakout + +In addition to filtering a breakout to a single value (e.g., `csid=1`), you can now specify multiple values for a breakout field using comma-separated values. This will return separate metrics for each specified value. + +For example, to get metrics for both worker nodes 1 and 2: + + # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid=1,2,cstype=worker,type=physical + +This will return two separate metrics: one for `csid=1` and one for `csid=2`, without including metrics for any other csid values that might exist in the data. + +**Important**: The comma separator has different meanings depending on context: +- Between different breakout fields: `csid,cstype` means break out by both csid AND cstype +- Within a value list: `csid=1,2` means break out by csid, but only include values 1 and 2 +- Mixed usage: `csid=1,2,cstype=worker` means break out by csid (only values 1,2) and cstype (only value worker) + +This feature is particularly useful when: +- You want to compare specific hosts or components without seeing all possible values +- You need to reduce output by focusing on a subset of values +- You want to query multiple specific values in a single command instead of running separate queries + +**Note**: Each comma-separated value in a breakout filter (e.g., `csid=1,2`) will produce separate metrics in the output, not an aggregated metric. Future enhancements may support aggregation using a different syntax (e.g., `csid=1+2`). + So far all of the metrics have been represented as a single value for a specific time period. When `--period` is used, the script finds the begin and end times for this period, which in most cases, has a duration equal to the measurement time in the benchmark itself (around 90 seconds in these examples). One can also specify `--run`, `--begin`, and `--end` instead of `--period`, should they need to focus on a different period of time. However, for benchmark metrics (such as uperf), it is important to limit the begin and end to within the actual measurement period for that sample. Conversely, tool metrics can use a begin and end spanning any time period within the run, as the tool collection tends to run continuously for any particular run. Whatever time period is used, one can also use `--resolution` to divide this time period into multiple data-samples, in order to generate things like line graphs: # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid=1,cstype=worker,type=physical,direction=tx,dev --filter gt:0.01 --resolution 10 diff --git a/queries/cdmq/cdm.js b/queries/cdmq/cdm.js index 703b77ac..69049d95 100644 --- a/queries/cdmq/cdm.js +++ b/queries/cdmq/cdm.js @@ -2609,6 +2609,7 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { q.query.bool.filter.push(JSON.parse('{"term": {"run.run-uuid": "' + set.run + '"}}')); } // If the breakout contains a match requirement (something like "host=myhost"), then we must add a term filter for it. + // Multiple values can be specified with commas: "host=a,b,c" which will match any of those values. // Eventually it would be nice to have something other than a match, like a regex: host=/^client/. var regExp = /([^\=]+)\=([^\=]+)/; set.breakout.forEach((field) => { @@ -2616,7 +2617,17 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { if (matches) { field = matches[1]; value = matches[2]; - q.query.bool.filter.push(JSON.parse('{"term": {"metric_desc.names.' + field + '": "' + value + '"}}')); + // Check if the value contains multiple comma-separated values + var values = value.split(','); + if (values.length > 1) { + // Multiple values: use "terms" query (note the plural) + q.query.bool.filter.push( + JSON.parse('{"terms": {"metric_desc.names.' + field + '": ' + JSON.stringify(values) + '}}') + ); + } else { + // Single value: use "term" query (singular) + q.query.bool.filter.push(JSON.parse('{"term": {"metric_desc.names.' + field + '": "' + value + '"}}')); + } } }); q.aggs = aggs; diff --git a/queries/cdmq/get-metric-data.js b/queries/cdmq/get-metric-data.js index 38373eab..0f658e90 100644 --- a/queries/cdmq/get-metric-data.js +++ b/queries/cdmq/get-metric-data.js @@ -15,7 +15,53 @@ var sprintf = require('sprintf-js').sprintf; var instances = []; // opensearch instances function list(val) { - return val.split(','); + // Parse breakout string to handle both: + // - Simple breakouts: "hostname,cpu" -> ["hostname", "cpu"] + // - Breakouts with values: "hostname=a,cpu=x" -> ["hostname=a", "cpu=x"] + // - Breakouts with multiple values: "hostname=a,b,cpu=x,y" -> ["hostname=a,b", "cpu=x,y"] + // + // The key insight: a comma separates breakout fields UNLESS we're currently + // parsing a value list (after '=' and before the next field with '=') + + var result = []; + var current = ''; + var inValueList = false; + var parts = val.split(','); + + for (var i = 0; i < parts.length; i++) { + var part = parts[i]; + var hasEqual = part.includes('='); + + if (inValueList && !hasEqual) { + // We're in a value list and this part doesn't have '=', so it's another value + current += ',' + part; + } else if (inValueList && hasEqual) { + // We were in a value list, but now we hit a new key=value pair + result.push(current); + current = part; + inValueList = true; + } else if (!inValueList && hasEqual) { + // Starting a new key=value pair + if (current !== '') { + result.push(current); + } + current = part; + inValueList = true; + } else { + // !inValueList && !hasEqual - simple breakout field without value filter + if (current !== '') { + result.push(current); + } + current = part; + inValueList = false; + } + } + + if (current !== '') { + result.push(current); + } + + return result; } function save_host(host) { From 54b29df5f601588a34e3dbd882f457f685c48d4e Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 12:00:06 -0500 Subject: [PATCH 2/8] Add regex pattern support for breakout filters Implemented regex pattern matching in breakout filters with two modes: - r/pattern/ (lowercase): Returns separate metrics for each matching value - R/pattern/ (uppercase): Returns single aggregated metric for all matches Features: - Custom delimiter support: use any character after r/R as delimiter (e.g., r/pattern/, r|pattern|, r#pattern#) - Consistent syntax with literal values (r vs R parallels , vs +) - OpenSearch regexp query integration for efficient pattern matching Examples: - --breakout hostname=r/^worker-.*/ (separate metrics per worker) - --breakout hostname=R/^client-.*/ (aggregated metric for all clients) - --breakout dev=r|/dev/sd.*| (custom delimiter for patterns with /) Implementation: - Modified getBreakoutAggregation() to exclude fields with R/pattern/ - Updated getMetricGroupsFromBreakouts() to detect and apply regexp filters - Added comprehensive documentation with examples and use cases Co-Authored-By: Claude Sonnet 4.5 --- queries/cdmq/README.md | 44 +++++++++++++++++++++++ queries/cdmq/cdm.js | 79 +++++++++++++++++++++++++++++++----------- 2 files changed, 103 insertions(+), 20 deletions(-) diff --git a/queries/cdmq/README.md b/queries/cdmq/README.md index 74de317f..84752d2e 100644 --- a/queries/cdmq/README.md +++ b/queries/cdmq/README.md @@ -406,6 +406,50 @@ This feature is particularly useful when: **Note**: Each comma-separated value in a breakout filter (e.g., `csid=1,2`) will produce separate metrics in the output, not an aggregated metric. Future enhancements may support aggregation using a different syntax (e.g., `csid=1+2`). +#### Using Regular Expressions in Breakouts + +In addition to specifying exact values or lists of values, you can use regular expressions to match multiple values dynamically. This is particularly useful when you want to match values that follow a pattern without knowing all possible values in advance. + +**Syntax**: Use `r/pattern/` for separate metrics (one per match) or `R/pattern/` for aggregated metrics (all matches combined). + +- **Lowercase `r`**: Each value matching the pattern gets its own metric (similar to `hostname=a,b,c`) +- **Uppercase `R`**: All values matching the pattern are aggregated into a single metric (similar to future `hostname=a+b+c`) + +**Examples:** + +```bash +# Get separate metrics for all worker nodes matching the pattern +node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=r/^worker-.*/ + +# Get a single aggregated metric for all client nodes +node ./get-metric-data.js --period --source sar-net --type L2-Gbps --breakout hostname=R/^client-.*/ + +# Mix regex with other filters +node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=r/worker-[0-9]+/,cstype=physical + +# Use different delimiter if pattern contains slashes +node ./get-metric-data.js --period --source iostat --type kB-sec --breakout dev=r|/dev/sd.*| +``` + +**Custom Delimiter**: The character immediately after `r` or `R` is used as the delimiter. While `/` is conventional, you can use any character (like `|`, `#`, `@`, `~`) if your pattern contains forward slashes. + +**Regular Expression Syntax**: The patterns use OpenSearch regex syntax, which is similar to standard regex but with some differences. Common patterns include: +- `.*` - Match any characters (zero or more) +- `.+` - Match any characters (one or more) +- `^` - Match start of string +- `$` - Match end of string +- `[0-9]` - Match any digit +- `[a-z]` - Match any lowercase letter +- `(a|b)` - Match 'a' or 'b' + +**Use Cases:** +- Match all nodes of a certain type: `hostname=r/^worker-.*/` +- Match numbered resources: `cpu=r/[0-9]+/` +- Match specific patterns: `device=r/^eth[0-9]/` +- Exclude certain patterns: Use regex negative lookahead if needed + +**Performance Note**: Regex patterns are evaluated by OpenSearch and may be slower than exact value matches for very large datasets. Use them when the flexibility is needed. + So far all of the metrics have been represented as a single value for a specific time period. When `--period` is used, the script finds the begin and end times for this period, which in most cases, has a duration equal to the measurement time in the benchmark itself (around 90 seconds in these examples). One can also specify `--run`, `--begin`, and `--end` instead of `--period`, should they need to focus on a different period of time. However, for benchmark metrics (such as uperf), it is important to limit the begin and end to within the actual measurement period for that sample. Conversely, tool metrics can use a begin and end spanning any time period within the run, as the tool collection tends to run continuously for any particular run. Whatever time period is used, one can also use `--resolution` to divide this time period into multiple data-samples, in order to generate things like line graphs: # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid=1,cstype=worker,type=physical,direction=tx,dev --filter gt:0.01 --resolution 10 diff --git a/queries/cdmq/cdm.js b/queries/cdmq/cdm.js index 69049d95..e8774daf 100644 --- a/queries/cdmq/cdm.js +++ b/queries/cdmq/cdm.js @@ -2425,21 +2425,37 @@ getBreakoutAggregation = function (source, type, breakout) { breakout.forEach((field) => { //if (/([^\=]+)\=([^\=]+)/.exec(field)) { var matches = regExp.exec(field); + var shouldAggregate = true; // default: include in aggregation + if (matches) { //field = $1; - field = matches[1]; + var fieldName = matches[1]; + var value = matches[2]; + + // Check if this is an aggregated regex pattern (R/pattern/) + // If uppercase R, we should NOT add this field to the aggregation + // (all matches will be combined into a single metric) + if (/^R./.test(value)) { + shouldAggregate = false; + } + + field = fieldName; + } + + // Only add to aggregation if shouldAggregate is true + if (shouldAggregate) { + agg_str += + ',"aggs": { "metric_desc.names.' + + field + + '": { "terms": ' + + '{ "show_term_doc_count_error": true, "size": ' + + bigQuerySize + + ',' + + '"field": "metric_desc.names.' + + field + + '" }'; + field_count++; } - agg_str += - ',"aggs": { "metric_desc.names.' + - field + - '": { "terms": ' + - '{ "show_term_doc_count_error": true, "size": ' + - bigQuerySize + - ',' + - '"field": "metric_desc.names.' + - field + - '" }'; - field_count++; }); while (field_count > 0) { agg_str += '}}'; @@ -2610,23 +2626,46 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { } // If the breakout contains a match requirement (something like "host=myhost"), then we must add a term filter for it. // Multiple values can be specified with commas: "host=a,b,c" which will match any of those values. - // Eventually it would be nice to have something other than a match, like a regex: host=/^client/. + // Regex patterns can be specified with r/pattern/ (separate metrics) or R/pattern/ (aggregated metric). var regExp = /([^\=]+)\=([^\=]+)/; set.breakout.forEach((field) => { var matches = regExp.exec(field); if (matches) { field = matches[1]; value = matches[2]; - // Check if the value contains multiple comma-separated values - var values = value.split(','); - if (values.length > 1) { - // Multiple values: use "terms" query (note the plural) + + // Check if it's a regex pattern: r/pattern/ or R/pattern/ + // Group 1: r or R (lowercase = separate metrics, uppercase = aggregated) + // Group 2: delimiter character (usually /, but can be any char) + // Group 3: the actual regex pattern + // \2: backreference to ensure matching closing delimiter + var regexMatch = /^([rR])(.)(.+)\2$/.exec(value); + + if (regexMatch) { + // It's a regex pattern + var isAggregated = regexMatch[1] === 'R'; + var delimiter = regexMatch[2]; + var pattern = regexMatch[3]; + + // Add regexp filter to OpenSearch query + // Both r/pattern/ and R/pattern/ use the same filter, + // the difference is in the aggregation (handled in getBreakoutAggregation) q.query.bool.filter.push( - JSON.parse('{"terms": {"metric_desc.names.' + field + '": ' + JSON.stringify(values) + '}}') + JSON.parse('{"regexp": {"metric_desc.names.' + field + '": ' + JSON.stringify(pattern) + '}}') ); } else { - // Single value: use "term" query (singular) - q.query.bool.filter.push(JSON.parse('{"term": {"metric_desc.names.' + field + '": "' + value + '"}}')); + // Not a regex pattern, handle as literal value(s) + // Check if the value contains multiple comma-separated values + var values = value.split(','); + if (values.length > 1) { + // Multiple values: use "terms" query (note the plural) + q.query.bool.filter.push( + JSON.parse('{"terms": {"metric_desc.names.' + field + '": ' + JSON.stringify(values) + '}}') + ); + } else { + // Single value: use "term" query (singular) + q.query.bool.filter.push(JSON.parse('{"term": {"metric_desc.names.' + field + '": "' + value + '"}}')); + } } } }); From 217d77e8ef7a9f20428e687f85ba3accaaf361b8 Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 12:07:17 -0500 Subject: [PATCH 3/8] Add helpful error message when regex filter matches nothing When a regex breakout filter (r/pattern/ or R/pattern/) doesn't match any metric values, the query previously failed with a cryptic error: "number of generated data sets (0) does not match the number of metric query sets (1)" This commit adds detection for empty result sets caused by regex filters and returns a clear, actionable error message explaining: - Which source/type was queried - Which regex filter(s) didn't match - Suggestions for troubleshooting Example error output: No metrics found matching the specified filter(s) for source=mpstat, type=Busy-CPU Regex filter hostname=r/^nonexistent-.*/ did not match any values. Please verify: 1. The regex pattern is correct 2. Metrics exist for this source/type with the specified field 3. The field values match the pattern Co-Authored-By: Claude Sonnet 4.5 --- queries/cdmq/cdm.js | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/queries/cdmq/cdm.js b/queries/cdmq/cdm.js index e8774daf..1a3cf272 100644 --- a/queries/cdmq/cdm.js +++ b/queries/cdmq/cdm.js @@ -3239,6 +3239,42 @@ getMetricDataSets = async function (instance, sets, yearDotMonth) { return { 'ret-code': retCode, 'ret-msg': retMsg }; } var metricGroupIdsByLabelSets = resp['metric-id-sets']; + + // Check if any regex filters resulted in zero matches + for (var idx = 0; idx < metricGroupIdsByLabelSets.length; idx++) { + if (Object.keys(metricGroupIdsByLabelSets[idx]).length === 0) { + // This set has no metric groups - check if it was due to a regex filter + var regexFilters = []; + var regExp = /([^\=]+)\=([^\=]+)/; + sets[idx].breakout.forEach((field) => { + var matches = regExp.exec(field); + if (matches) { + var fieldName = matches[1]; + var value = matches[2]; + // Check if it's a regex pattern + if (/^[rR]./.test(value)) { + regexFilters.push({ field: fieldName, pattern: value }); + } + } + }); + + if (regexFilters.length > 0) { + // Build helpful error message + retMsg = 'No metrics found matching the specified filter(s) for source=' + sets[idx].source + ', type=' + sets[idx].type; + regexFilters.forEach((rf) => { + retMsg += '\n Regex filter ' + rf.field + '=' + rf.pattern + ' did not match any values.'; + }); + retMsg += '\nPlease verify:'; + retMsg += '\n 1. The regex pattern is correct'; + retMsg += '\n 2. Metrics exist for this source/type with the specified field'; + retMsg += '\n 3. The field values match the pattern'; + retCode = 1; + return { 'ret-code': retCode, 'ret-msg': retMsg }; + } + // If no regex filters, continue with existing error handling + } + } + var dataSets = await getMetricDataFromIdsSets(instance, sets, metricGroupIdsByLabelSets, yearDotMonth); if (dataSets.length != sets.length) { From 297baa5c7268d9c262579d01f7afd71b1ce0b4cb Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 12:11:44 -0500 Subject: [PATCH 4/8] Add aggregated literal values support with + separator Implemented aggregation of multiple literal values using the plus (+) separator, completing the unified syntax design for breakout filters. Syntax: - hostname=a,b,c (comma): Returns 3 separate metrics - hostname=a+b+c (plus): Returns 1 aggregated metric combining a, b, and c Features: - Consistent with regex syntax (r vs R parallels , vs +) - Uses same OpenSearch "terms" filter for both , and + - Aggregation controlled by getBreakoutAggregation() (excludes field) - Enhanced error messages for both comma and plus separated filters Implementation: - Modified getBreakoutAggregation() to detect + and exclude from aggregation - Updated query builder to split on + and create terms filter - Extended error handling to cover literal value filters - Added comprehensive documentation with examples and feature matrix Examples: - --breakout hostname=worker-1+worker-2+worker-3 (aggregated) - --breakout hostname=worker-1,worker-2,worker-3 (separate) - --breakout cstype=worker+master (combined metric for both types) Complete Feature Matrix: | Syntax | Result | |--------|--------| | hostname=a | 1 metric for 'a' | | hostname=a,b,c | 3 separate metrics | | hostname=a+b+c | 1 aggregated metric | | hostname=r/pattern/ | N separate metrics | | hostname=R/pattern/ | 1 aggregated metric | Co-Authored-By: Claude Sonnet 4.5 --- queries/cdmq/README.md | 47 +++++++++++++++++++++++++++++++- queries/cdmq/cdm.js | 62 ++++++++++++++++++++++++++++++++---------- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/queries/cdmq/README.md b/queries/cdmq/README.md index 84752d2e..18a6b8c7 100644 --- a/queries/cdmq/README.md +++ b/queries/cdmq/README.md @@ -404,7 +404,33 @@ This feature is particularly useful when: - You need to reduce output by focusing on a subset of values - You want to query multiple specific values in a single command instead of running separate queries -**Note**: Each comma-separated value in a breakout filter (e.g., `csid=1,2`) will produce separate metrics in the output, not an aggregated metric. Future enhancements may support aggregation using a different syntax (e.g., `csid=1+2`). +#### Aggregating Multiple Values into a Single Metric + +When you want to combine data from multiple values into a single aggregated metric, use the plus (`+`) separator instead of comma. This is useful when you want to see the combined throughput, utilization, or other metrics across multiple resources. + +**Syntax**: Use `field=value1+value2+value3` to aggregate values into a single metric. + +**Example - Compare separate vs aggregated**: + +```bash +# Separate metrics for each worker (comma separator) +node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=worker-1,worker-2,worker-3 +# Returns 3 metrics: one for worker-1, one for worker-2, one for worker-3 + +# Single aggregated metric for all workers (plus separator) +node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=worker-1+worker-2+worker-3 +# Returns 1 metric: combined data from worker-1, worker-2, and worker-3 +``` + +**Use Cases:** +- Measure total throughput across multiple servers: `hostname=server-1+server-2+server-3` +- Combine CPU usage across multiple cores: `cpu=0+1+2+3` +- Aggregate network traffic from multiple interfaces: `dev=eth0+eth1` +- Compare aggregate metrics: `--breakout cstype=worker+master` shows combined metric for both types + +**Important**: The separator determines the output behavior: +- **Comma (`,`)**: Separate metrics - `hostname=a,b` returns 2 metrics +- **Plus (`+`)**: Aggregated metric - `hostname=a+b` returns 1 combined metric #### Using Regular Expressions in Breakouts @@ -450,6 +476,25 @@ node ./get-metric-data.js --period --source iostat --type kB-sec --breako **Performance Note**: Regex patterns are evaluated by OpenSearch and may be slower than exact value matches for very large datasets. Use them when the flexibility is needed. +#### Complete Breakout Feature Matrix + +The following table summarizes all available breakout filter syntaxes: + +| Syntax | Type | Result | Example | +|--------|------|--------|---------| +| `hostname=a` | Single literal value | 1 metric for 'a' | `--breakout hostname=worker-1` | +| `hostname=a,b,c` | Multiple literals (comma) | 3 separate metrics | `--breakout hostname=worker-1,worker-2,worker-3` | +| `hostname=a+b+c` | Multiple literals (plus) | 1 aggregated metric | `--breakout hostname=worker-1+worker-2+worker-3` | +| `hostname=r/pattern/` | Regex (lowercase r) | N separate metrics (one per match) | `--breakout hostname=r/^worker-.*/` | +| `hostname=R/pattern/` | Regex (uppercase R) | 1 aggregated metric (all matches) | `--breakout hostname=R/^worker-.*/` | +| `hostname` | No value filter | N separate metrics (all values) | `--breakout hostname` | + +**Design Philosophy**: The syntax follows a consistent pattern: +- **Lowercase/comma** = Separate metrics for each value +- **Uppercase/plus** = Aggregated metric combining all values + +This provides an intuitive, powerful interface for both literal and pattern-based metric selection and aggregation. + So far all of the metrics have been represented as a single value for a specific time period. When `--period` is used, the script finds the begin and end times for this period, which in most cases, has a duration equal to the measurement time in the benchmark itself (around 90 seconds in these examples). One can also specify `--run`, `--begin`, and `--end` instead of `--period`, should they need to focus on a different period of time. However, for benchmark metrics (such as uperf), it is important to limit the begin and end to within the actual measurement period for that sample. Conversely, tool metrics can use a begin and end spanning any time period within the run, as the tool collection tends to run continuously for any particular run. Whatever time period is used, one can also use `--resolution` to divide this time period into multiple data-samples, in order to generate things like line graphs: # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid=1,cstype=worker,type=physical,direction=tx,dev --filter gt:0.01 --resolution 10 diff --git a/queries/cdmq/cdm.js b/queries/cdmq/cdm.js index 1a3cf272..b22fe1f6 100644 --- a/queries/cdmq/cdm.js +++ b/queries/cdmq/cdm.js @@ -2432,11 +2432,14 @@ getBreakoutAggregation = function (source, type, breakout) { var fieldName = matches[1]; var value = matches[2]; - // Check if this is an aggregated regex pattern (R/pattern/) - // If uppercase R, we should NOT add this field to the aggregation - // (all matches will be combined into a single metric) + // Check if this should be aggregated (combined into a single metric) + // Two cases where we should NOT add field to aggregation: + // 1. Aggregated regex pattern: R/pattern/ + // 2. Aggregated literal values: a+b+c if (/^R./.test(value)) { shouldAggregate = false; + } else if (value.includes('+')) { + shouldAggregate = false; } field = fieldName; @@ -2625,7 +2628,9 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { q.query.bool.filter.push(JSON.parse('{"term": {"run.run-uuid": "' + set.run + '"}}')); } // If the breakout contains a match requirement (something like "host=myhost"), then we must add a term filter for it. - // Multiple values can be specified with commas: "host=a,b,c" which will match any of those values. + // Multiple values can be specified with: + // - Commas for separate metrics: "host=a,b,c" + // - Plus signs for aggregated metric: "host=a+b+c" // Regex patterns can be specified with r/pattern/ (separate metrics) or R/pattern/ (aggregated metric). var regExp = /([^\=]+)\=([^\=]+)/; set.breakout.forEach((field) => { @@ -2655,10 +2660,25 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { ); } else { // Not a regex pattern, handle as literal value(s) - // Check if the value contains multiple comma-separated values - var values = value.split(','); + var values; + var isAggregated = false; + + // Check for aggregated values (plus-separated) + if (value.includes('+')) { + values = value.split('+'); + isAggregated = true; + } else if (value.includes(',')) { + // Multiple separate values (comma-separated) + values = value.split(','); + isAggregated = false; + } else { + // Single value + values = [value]; + } + if (values.length > 1) { - // Multiple values: use "terms" query (note the plural) + // Multiple values (either a+b or a,b): use "terms" query (note the plural) + // The aggregation behavior is controlled by getBreakoutAggregation q.query.bool.filter.push( JSON.parse('{"terms": {"metric_desc.names.' + field + '": ' + JSON.stringify(values) + '}}') ); @@ -3240,11 +3260,12 @@ getMetricDataSets = async function (instance, sets, yearDotMonth) { } var metricGroupIdsByLabelSets = resp['metric-id-sets']; - // Check if any regex filters resulted in zero matches + // Check if any filters resulted in zero matches for (var idx = 0; idx < metricGroupIdsByLabelSets.length; idx++) { if (Object.keys(metricGroupIdsByLabelSets[idx]).length === 0) { - // This set has no metric groups - check if it was due to a regex filter + // This set has no metric groups - check if it was due to filters var regexFilters = []; + var literalFilters = []; var regExp = /([^\=]+)\=([^\=]+)/; sets[idx].breakout.forEach((field) => { var matches = regExp.exec(field); @@ -3254,24 +3275,37 @@ getMetricDataSets = async function (instance, sets, yearDotMonth) { // Check if it's a regex pattern if (/^[rR]./.test(value)) { regexFilters.push({ field: fieldName, pattern: value }); + } else if (value.includes(',') || value.includes('+')) { + // Literal value filter (comma or plus separated) + literalFilters.push({ field: fieldName, values: value }); + } else { + // Single literal value filter + literalFilters.push({ field: fieldName, values: value }); } } }); - if (regexFilters.length > 0) { + if (regexFilters.length > 0 || literalFilters.length > 0) { // Build helpful error message - retMsg = 'No metrics found matching the specified filter(s) for source=' + sets[idx].source + ', type=' + sets[idx].type; + retMsg = + 'No metrics found matching the specified filter(s) for source=' + + sets[idx].source + + ', type=' + + sets[idx].type; regexFilters.forEach((rf) => { retMsg += '\n Regex filter ' + rf.field + '=' + rf.pattern + ' did not match any values.'; }); + literalFilters.forEach((lf) => { + retMsg += '\n Filter ' + lf.field + '=' + lf.values + ' did not match any values.'; + }); retMsg += '\nPlease verify:'; - retMsg += '\n 1. The regex pattern is correct'; + retMsg += '\n 1. The filter values/patterns are correct'; retMsg += '\n 2. Metrics exist for this source/type with the specified field'; - retMsg += '\n 3. The field values match the pattern'; + retMsg += '\n 3. The field values match your filter'; retCode = 1; return { 'ret-code': retCode, 'ret-msg': retMsg }; } - // If no regex filters, continue with existing error handling + // If no filters with values, continue with existing error handling } } From 054ae2e65bfc85a0eea8b7d4773a9a136ecf7d1d Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 12:20:05 -0500 Subject: [PATCH 5/8] Revert "Add aggregated literal values support with + separator" This reverts commit 297baa5c7268d9c262579d01f7afd71b1ce0b4cb. --- queries/cdmq/README.md | 47 +------------------------------- queries/cdmq/cdm.js | 62 ++++++++++-------------------------------- 2 files changed, 15 insertions(+), 94 deletions(-) diff --git a/queries/cdmq/README.md b/queries/cdmq/README.md index 18a6b8c7..84752d2e 100644 --- a/queries/cdmq/README.md +++ b/queries/cdmq/README.md @@ -404,33 +404,7 @@ This feature is particularly useful when: - You need to reduce output by focusing on a subset of values - You want to query multiple specific values in a single command instead of running separate queries -#### Aggregating Multiple Values into a Single Metric - -When you want to combine data from multiple values into a single aggregated metric, use the plus (`+`) separator instead of comma. This is useful when you want to see the combined throughput, utilization, or other metrics across multiple resources. - -**Syntax**: Use `field=value1+value2+value3` to aggregate values into a single metric. - -**Example - Compare separate vs aggregated**: - -```bash -# Separate metrics for each worker (comma separator) -node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=worker-1,worker-2,worker-3 -# Returns 3 metrics: one for worker-1, one for worker-2, one for worker-3 - -# Single aggregated metric for all workers (plus separator) -node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=worker-1+worker-2+worker-3 -# Returns 1 metric: combined data from worker-1, worker-2, and worker-3 -``` - -**Use Cases:** -- Measure total throughput across multiple servers: `hostname=server-1+server-2+server-3` -- Combine CPU usage across multiple cores: `cpu=0+1+2+3` -- Aggregate network traffic from multiple interfaces: `dev=eth0+eth1` -- Compare aggregate metrics: `--breakout cstype=worker+master` shows combined metric for both types - -**Important**: The separator determines the output behavior: -- **Comma (`,`)**: Separate metrics - `hostname=a,b` returns 2 metrics -- **Plus (`+`)**: Aggregated metric - `hostname=a+b` returns 1 combined metric +**Note**: Each comma-separated value in a breakout filter (e.g., `csid=1,2`) will produce separate metrics in the output, not an aggregated metric. Future enhancements may support aggregation using a different syntax (e.g., `csid=1+2`). #### Using Regular Expressions in Breakouts @@ -476,25 +450,6 @@ node ./get-metric-data.js --period --source iostat --type kB-sec --breako **Performance Note**: Regex patterns are evaluated by OpenSearch and may be slower than exact value matches for very large datasets. Use them when the flexibility is needed. -#### Complete Breakout Feature Matrix - -The following table summarizes all available breakout filter syntaxes: - -| Syntax | Type | Result | Example | -|--------|------|--------|---------| -| `hostname=a` | Single literal value | 1 metric for 'a' | `--breakout hostname=worker-1` | -| `hostname=a,b,c` | Multiple literals (comma) | 3 separate metrics | `--breakout hostname=worker-1,worker-2,worker-3` | -| `hostname=a+b+c` | Multiple literals (plus) | 1 aggregated metric | `--breakout hostname=worker-1+worker-2+worker-3` | -| `hostname=r/pattern/` | Regex (lowercase r) | N separate metrics (one per match) | `--breakout hostname=r/^worker-.*/` | -| `hostname=R/pattern/` | Regex (uppercase R) | 1 aggregated metric (all matches) | `--breakout hostname=R/^worker-.*/` | -| `hostname` | No value filter | N separate metrics (all values) | `--breakout hostname` | - -**Design Philosophy**: The syntax follows a consistent pattern: -- **Lowercase/comma** = Separate metrics for each value -- **Uppercase/plus** = Aggregated metric combining all values - -This provides an intuitive, powerful interface for both literal and pattern-based metric selection and aggregation. - So far all of the metrics have been represented as a single value for a specific time period. When `--period` is used, the script finds the begin and end times for this period, which in most cases, has a duration equal to the measurement time in the benchmark itself (around 90 seconds in these examples). One can also specify `--run`, `--begin`, and `--end` instead of `--period`, should they need to focus on a different period of time. However, for benchmark metrics (such as uperf), it is important to limit the begin and end to within the actual measurement period for that sample. Conversely, tool metrics can use a begin and end spanning any time period within the run, as the tool collection tends to run continuously for any particular run. Whatever time period is used, one can also use `--resolution` to divide this time period into multiple data-samples, in order to generate things like line graphs: # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid=1,cstype=worker,type=physical,direction=tx,dev --filter gt:0.01 --resolution 10 diff --git a/queries/cdmq/cdm.js b/queries/cdmq/cdm.js index b22fe1f6..1a3cf272 100644 --- a/queries/cdmq/cdm.js +++ b/queries/cdmq/cdm.js @@ -2432,14 +2432,11 @@ getBreakoutAggregation = function (source, type, breakout) { var fieldName = matches[1]; var value = matches[2]; - // Check if this should be aggregated (combined into a single metric) - // Two cases where we should NOT add field to aggregation: - // 1. Aggregated regex pattern: R/pattern/ - // 2. Aggregated literal values: a+b+c + // Check if this is an aggregated regex pattern (R/pattern/) + // If uppercase R, we should NOT add this field to the aggregation + // (all matches will be combined into a single metric) if (/^R./.test(value)) { shouldAggregate = false; - } else if (value.includes('+')) { - shouldAggregate = false; } field = fieldName; @@ -2628,9 +2625,7 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { q.query.bool.filter.push(JSON.parse('{"term": {"run.run-uuid": "' + set.run + '"}}')); } // If the breakout contains a match requirement (something like "host=myhost"), then we must add a term filter for it. - // Multiple values can be specified with: - // - Commas for separate metrics: "host=a,b,c" - // - Plus signs for aggregated metric: "host=a+b+c" + // Multiple values can be specified with commas: "host=a,b,c" which will match any of those values. // Regex patterns can be specified with r/pattern/ (separate metrics) or R/pattern/ (aggregated metric). var regExp = /([^\=]+)\=([^\=]+)/; set.breakout.forEach((field) => { @@ -2660,25 +2655,10 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { ); } else { // Not a regex pattern, handle as literal value(s) - var values; - var isAggregated = false; - - // Check for aggregated values (plus-separated) - if (value.includes('+')) { - values = value.split('+'); - isAggregated = true; - } else if (value.includes(',')) { - // Multiple separate values (comma-separated) - values = value.split(','); - isAggregated = false; - } else { - // Single value - values = [value]; - } - + // Check if the value contains multiple comma-separated values + var values = value.split(','); if (values.length > 1) { - // Multiple values (either a+b or a,b): use "terms" query (note the plural) - // The aggregation behavior is controlled by getBreakoutAggregation + // Multiple values: use "terms" query (note the plural) q.query.bool.filter.push( JSON.parse('{"terms": {"metric_desc.names.' + field + '": ' + JSON.stringify(values) + '}}') ); @@ -3260,12 +3240,11 @@ getMetricDataSets = async function (instance, sets, yearDotMonth) { } var metricGroupIdsByLabelSets = resp['metric-id-sets']; - // Check if any filters resulted in zero matches + // Check if any regex filters resulted in zero matches for (var idx = 0; idx < metricGroupIdsByLabelSets.length; idx++) { if (Object.keys(metricGroupIdsByLabelSets[idx]).length === 0) { - // This set has no metric groups - check if it was due to filters + // This set has no metric groups - check if it was due to a regex filter var regexFilters = []; - var literalFilters = []; var regExp = /([^\=]+)\=([^\=]+)/; sets[idx].breakout.forEach((field) => { var matches = regExp.exec(field); @@ -3275,37 +3254,24 @@ getMetricDataSets = async function (instance, sets, yearDotMonth) { // Check if it's a regex pattern if (/^[rR]./.test(value)) { regexFilters.push({ field: fieldName, pattern: value }); - } else if (value.includes(',') || value.includes('+')) { - // Literal value filter (comma or plus separated) - literalFilters.push({ field: fieldName, values: value }); - } else { - // Single literal value filter - literalFilters.push({ field: fieldName, values: value }); } } }); - if (regexFilters.length > 0 || literalFilters.length > 0) { + if (regexFilters.length > 0) { // Build helpful error message - retMsg = - 'No metrics found matching the specified filter(s) for source=' + - sets[idx].source + - ', type=' + - sets[idx].type; + retMsg = 'No metrics found matching the specified filter(s) for source=' + sets[idx].source + ', type=' + sets[idx].type; regexFilters.forEach((rf) => { retMsg += '\n Regex filter ' + rf.field + '=' + rf.pattern + ' did not match any values.'; }); - literalFilters.forEach((lf) => { - retMsg += '\n Filter ' + lf.field + '=' + lf.values + ' did not match any values.'; - }); retMsg += '\nPlease verify:'; - retMsg += '\n 1. The filter values/patterns are correct'; + retMsg += '\n 1. The regex pattern is correct'; retMsg += '\n 2. Metrics exist for this source/type with the specified field'; - retMsg += '\n 3. The field values match your filter'; + retMsg += '\n 3. The field values match the pattern'; retCode = 1; return { 'ret-code': retCode, 'ret-msg': retMsg }; } - // If no filters with values, continue with existing error handling + // If no regex filters, continue with existing error handling } } From c8ac61cbe25b826860718d4e2a3148460d085446 Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 12:20:41 -0500 Subject: [PATCH 6/8] Update documentation to use regex alternation for aggregating specific values Instead of the reverted a+b syntax, document the use of regex alternation with uppercase R to aggregate specific literal values: - hostname=R/worker-1|worker-2|worker-3/ aggregates those 3 specific hosts - This approach works correctly with the existing regex implementation - Provides the same functionality without additional code complexity Updated examples and removed references to future a+b+c syntax. Co-Authored-By: Claude Sonnet 4.5 --- queries/cdmq/README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/queries/cdmq/README.md b/queries/cdmq/README.md index 84752d2e..28faf322 100644 --- a/queries/cdmq/README.md +++ b/queries/cdmq/README.md @@ -404,7 +404,7 @@ This feature is particularly useful when: - You need to reduce output by focusing on a subset of values - You want to query multiple specific values in a single command instead of running separate queries -**Note**: Each comma-separated value in a breakout filter (e.g., `csid=1,2`) will produce separate metrics in the output, not an aggregated metric. Future enhancements may support aggregation using a different syntax (e.g., `csid=1+2`). +**Note**: Each comma-separated value in a breakout filter (e.g., `csid=1,2`) will produce separate metrics in the output, not an aggregated metric. To aggregate specific values into a single metric, use regex alternation with uppercase `R`: `hostname=R/worker-1|worker-2|worker-3/` (see regex section below). #### Using Regular Expressions in Breakouts @@ -413,7 +413,7 @@ In addition to specifying exact values or lists of values, you can use regular e **Syntax**: Use `r/pattern/` for separate metrics (one per match) or `R/pattern/` for aggregated metrics (all matches combined). - **Lowercase `r`**: Each value matching the pattern gets its own metric (similar to `hostname=a,b,c`) -- **Uppercase `R`**: All values matching the pattern are aggregated into a single metric (similar to future `hostname=a+b+c`) +- **Uppercase `R`**: All values matching the pattern are aggregated into a single metric **Examples:** @@ -424,6 +424,9 @@ node ./get-metric-data.js --period --source mpstat --type Busy-CPU --brea # Get a single aggregated metric for all client nodes node ./get-metric-data.js --period --source sar-net --type L2-Gbps --breakout hostname=R/^client-.*/ +# Aggregate specific values using alternation (|) +node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=R/worker-1|worker-2|worker-3/ + # Mix regex with other filters node ./get-metric-data.js --period --source mpstat --type Busy-CPU --breakout hostname=r/worker-[0-9]+/,cstype=physical From 0f8c0c9270a129d1b18b730374d978ec50ae6f71 Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 12:27:17 -0500 Subject: [PATCH 7/8] Fix R/pattern/ to correctly filter aggregated results Fixed critical bug where R/pattern/ (aggregated regex) was including ALL values instead of only those matching the pattern. Root Cause: When using R/pattern/, the field is excluded from the aggregation structure (correct - this causes aggregation). However, the regexp filter was only applied to the initial aggregation query, not when querying for metric IDs. This meant the metric ID query had no regexp filter, resulting in ALL metric IDs being included in the aggregated result. Solution: 1. Extract regexp filters for aggregated fields (R/pattern/) after aggregation 2. Pass these filters to mgetMetricIdsFromTerms via termsSets 3. Apply the regexp filters when building metric ID queries Example: --breakout hostname=R/worker-1|worker-2/ Before: Aggregated ALL hostnames (worker-1, worker-2, worker-3) After: Aggregates ONLY worker-1 and worker-2 (correct) Technical Details: - Modified getMetricGroupsFromBreakouts to extract and preserve R/ filters - Modified mgetMetricIdsFromTerms to apply preserved regexp filters - Filters are added to the query.bool.filter array for metric ID lookups Co-Authored-By: Claude Sonnet 4.5 --- queries/cdmq/cdm.js | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/queries/cdmq/cdm.js b/queries/cdmq/cdm.js index 1a3cf272..f20fee8b 100644 --- a/queries/cdmq/cdm.js +++ b/queries/cdmq/cdm.js @@ -2509,6 +2509,7 @@ mgetMetricIdsFromTerms = async function (instance, termsSets, yearDotMonth) { for (i = 0; i < termsSets.length; i++) { var periId = termsSets[i].period; var runId = termsSets[i].run; + var regexpFilters = termsSets[i].regexpFilters || []; Object.keys(termsSets[i].termsByLabel) .sort() .forEach((label) => { @@ -2529,6 +2530,12 @@ mgetMetricIdsFromTerms = async function (instance, termsSets, yearDotMonth) { if (runId != null) { q.query.bool.filter.push(JSON.parse('{"term": {"run.run-uuid": "' + runId + '"}}')); } + // Apply any regexp filters that were excluded from aggregation + regexpFilters.forEach((rf) => { + q.query.bool.filter.push( + JSON.parse('{"regexp": {"metric_desc.names.' + rf.field + '": ' + JSON.stringify(rf.pattern) + '}}') + ); + }); jsonArr.push('{}'); jsonArr.push(JSON.stringify(q)); totalReqs++; @@ -2685,10 +2692,33 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) { var metricGroupTerms = getMetricGroupTermsFromAgg(responses[idx].aggregations); // Derive the label from each group and organize into a dict, key = label, value = the filter terms var metricGroupTermsByLabel = getMetricGroupTermsByLabel(metricGroupTerms); + + // Extract regexp filters that were excluded from aggregation (R/pattern/) + // These need to be preserved when querying for metric IDs + var regexpFilters = []; + var regExp = /([^\=]+)\=([^\=]+)/; + sets[idx].breakout.forEach((field) => { + var matches = regExp.exec(field); + if (matches) { + var fieldName = matches[1]; + var value = matches[2]; + var regexMatch = /^([rR])(.)(.+)\2$/.exec(value); + if (regexMatch) { + var isAggregated = regexMatch[1] === 'R'; + var pattern = regexMatch[3]; + if (isAggregated) { + // This field was excluded from aggregation, need to preserve the regexp filter + regexpFilters.push({ field: fieldName, pattern: pattern }); + } + } + } + }); + var thisLabelSet = { run: sets[idx].run, period: sets[idx].period, - termsByLabel: metricGroupTermsByLabel + termsByLabel: metricGroupTermsByLabel, + regexpFilters: regexpFilters }; termsSets.push(thisLabelSet); } From d721b0f05b31987f7f9ce2d90dd589ffc4075222 Mon Sep 17 00:00:00 2001 From: Andrew Theurer Date: Thu, 19 Feb 2026 13:17:05 -0500 Subject: [PATCH 8/8] prettier Run prettier --write on modified files to fix CI formatting checks Co-Authored-By: Claude Sonnet 4.5 --- queries/cdmq/README.md | 105 ++++++++++++++++++++++++----------------- queries/cdmq/cdm.js | 6 ++- 2 files changed, 67 insertions(+), 44 deletions(-) diff --git a/queries/cdmq/README.md b/queries/cdmq/README.md index 28faf322..9c5d034c 100644 --- a/queries/cdmq/README.md +++ b/queries/cdmq/README.md @@ -1,26 +1,32 @@ # cdmq ## Introduction -The contents of this directory contain a collection of scripts in Javascript intended to be executed with [node.js](https://nodejs.org). These scripts get data from an OpenSearch instance. The data must be in Common Data Format. which is documented in this project under [templates](../templates). The scripts here are meant to help inspect, compare, and export data from benchmarks and performance & resource-utilization tools, in order to report and investigate performance. -In order to generate this data, you must run a benchmark via automation framework which uses the Common Data Format and index that data into OpenSearch. One of those automation frameworks is the [crucible](https://github.com/perftool-incubator/crucible) project. A subproject of crucible, [crucible-examples](https://github.com/perftool-incubator/crucible-examples), includes scenarios to run some of these benchmarks. +The contents of this directory contain a collection of scripts in Javascript intended to be executed with [node.js](https://nodejs.org). These scripts get data from an OpenSearch instance. The data must be in Common Data Format. which is documented in this project under [templates](../templates). The scripts here are meant to help inspect, compare, and export data from benchmarks and performance & resource-utilization tools, in order to report and investigate performance. + +In order to generate this data, you must run a benchmark via automation framework which uses the Common Data Format and index that data into OpenSearch. One of those automation frameworks is the [crucible](https://github.com/perftool-incubator/crucible) project. A subproject of crucible, [crucible-examples](https://github.com/perftool-incubator/crucible-examples), includes scenarios to run some of these benchmarks. ## Terms -Many of the scripts refer to different terms we associate with either running a benchmark or examining the resulting data, and these terms are not always universally known or agreed upon for specific benchmarks (like uperf and fio), or even benchmark automation frameworks, Nevertheless, the CommonDataModel project has adopted the following terms, which originate from the crucible project. You will need to become familiar with these terms in order to use these scripts: + +Many of the scripts refer to different terms we associate with either running a benchmark or examining the resulting data, and these terms are not always universally known or agreed upon for specific benchmarks (like uperf and fio), or even benchmark automation frameworks, Nevertheless, the CommonDataModel project has adopted the following terms, which originate from the crucible project. You will need to become familiar with these terms in order to use these scripts: - benchmark: A specific benchmark, like fio, uperf, trafficgen, or oslat. -- run: An invocation of a command, like `crucible run`, which facilitates the execution of a benchmark, often running the benchmark many times. In the context `cdmq`, this usually refers to the data that was generated for that run. -- iteration: A set of parameters to execute a benchmark, for example, for uperf: `test-type: stream, wsize: 256, nthreads: 16, duration: 90`. One or more unique iterations typically make up a run -- parameter: An option used for the underlying benchmark. Most parameters are unique to a specific benchmark. -- sample: An actual execution of an iteration. Often there will be multiple samples for an iteration, in order to provide an average and standard-deviation. -- period: A time-period for a sample. When a sample is executed, there may be one or more periods which represent a certain phase for the benchmark, like warmup, measurement, etc. +- run: An invocation of a command, like `crucible run`, which facilitates the execution of a benchmark, often running the benchmark many times. In the context `cdmq`, this usually refers to the data that was generated for that run. +- iteration: A set of parameters to execute a benchmark, for example, for uperf: `test-type: stream, wsize: 256, nthreads: 16, duration: 90`. One or more unique iterations typically make up a run +- parameter: An option used for the underlying benchmark. Most parameters are unique to a specific benchmark. +- sample: An actual execution of an iteration. Often there will be multiple samples for an iteration, in order to provide an average and standard-deviation. +- period: A time-period for a sample. When a sample is executed, there may be one or more periods which represent a certain phase for the benchmark, like warmup, measurement, etc. - primary-period: A period where a benchmark's primary metric is measured -- primary-metric: A benchmark's most common metric, like `Gbps`, or `IOPS`. Each iteration has a primary-metric, but different iterations (different combinations of parameters) might have a different primary-metric. For example uperf samples with `test-type: stream` have a primary metric of `Gbps`, while `test-type: rr` uses `transactions-sec` +- primary-metric: A benchmark's most common metric, like `Gbps`, or `IOPS`. Each iteration has a primary-metric, but different iterations (different combinations of parameters) might have a different primary-metric. For example uperf samples with `test-type: stream` have a primary metric of `Gbps`, while `test-type: rr` uses `transactions-sec` - metric: Some unit of measure, either a measure of throughput (work/time) or a "count" (elapsed-time, latency, level, occupancy, etc), or a simple "pass/fail" + ## Scripts -Below are documented most common scripts used for this project. All of these scripts can be run via `node ./script-name.js`, and some have wrapper scripts `script-name.sh` which provide the casual user a more convenient invocation. If you are using [crucible](https://github.com/perftool-incubator/crucible), it may provide an alternative way to use this script (documented in each script subsection below). + +Below are documented most common scripts used for this project. All of these scripts can be run via `node ./script-name.js`, and some have wrapper scripts `script-name.sh` which provide the casual user a more convenient invocation. If you are using [crucible](https://github.com/perftool-incubator/crucible), it may provide an alternative way to use this script (documented in each script subsection below). + ### get-result-summary.js -This script produces a summary of a single run., including tags, metrics present, as well as all the iterations and their samples. To run this script, you must specify a run-id: `node ./get-result-summary.js --run 0bda53c3-f0b2-416a-be54-cee738b75010`. If you are using the crucible project, you will likely be using the crucible command-line `crucible get result --run 0bda53c3-f0b2-416a-be54-cee738b75010`. In this example, the following output is produced: + +This script produces a summary of a single run., including tags, metrics present, as well as all the iterations and their samples. To run this script, you must specify a run-id: `node ./get-result-summary.js --run 0bda53c3-f0b2-416a-be54-cee738b75010`. If you are using the crucible project, you will likely be using the crucible command-line `crucible get result --run 0bda53c3-f0b2-416a-be54-cee738b75010`. In this example, the following output is produced: run-id: 0bda53c3-f0b2-416a-be54-cee738b75010 tags: datapath=ovn-k-tc irq=bal kernel=4.18.0-305.34.2.el8_4.x86_64 mtu=1400 offload=False osruntime=chroot pods-per-worker=16 proto=tcp rcos=410.84.202202110840-0 scale_out_factor=1 sdn=OVNKubernetes test=stream topo=internode userenv=stream8 @@ -101,9 +107,12 @@ This script produces a summary of a single run., including tags, metrics present primary period-id: 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 period range: begin: 1648111546729 end: 1648111635267 result: (Gbps) samples: 6.37 6.26 6.30 mean: 6.31 min: 6.26 max: 6.37 stddev: 0.05 stddevpct: 0.87 -When investigating performance, users often start with a get-result-summary, and then drill-down to a specific instance to view various metrics. Note that all timestamps are millisecond epoch-time. + +When investigating performance, users often start with a get-result-summary, and then drill-down to a specific instance to view various metrics. Note that all timestamps are millisecond epoch-time. + ### get-metric-result.js -This script is used to dig deeper into the metrics (tool or benchmark data) found in a run or period. To find out which metrics are available for a run, look at the `metrics:` section from a get-result-summary.js output: + +This script is used to dig deeper into the metrics (tool or benchmark data) found in a run or period. To find out which metrics are available for a run, look at the `metrics:` section from a get-result-summary.js output: metrics: source: procstat @@ -125,11 +134,11 @@ This script is used to dig deeper into the metrics (tool or benchmark data) foun source: sar-tasks types: Context-switches-sec Processes-created-sec -This script requires either a `--period` option or a combination of `--run`, `--begin`, and `--end`, plus a `--source` and `--type`. In the following example, a query for uperf for Gbps is used: +This script requires either a `--period` option or a combination of `--run`, `--begin`, and `--end`, plus a `--source` and `--type`. In the following example, a query for uperf for Gbps is used: # node ./get-metric-data.js --period 52FB1F1E-AD33-11EC-B16C-ADE96E3275F7 --source uperf --type Gbps This produces a JSON output for this metric: - + { "name": "uperf", "type": "Gbps", @@ -175,9 +184,9 @@ The same query can be used for tool data, such as sar: ] } -Note that the value for sar-net, L2-Gbps is quite different than what is reported for Uperf, Gbps. This can be the case for many reasons, but in this case let's show how the two are actually can be similar. First, one must understand that while these both report Gbps, the information comes from different sources. One is measuring Gbps as reported by the client program in uperf, and another is total network throughput for all systems which were running sar. +Note that the value for sar-net, L2-Gbps is quite different than what is reported for Uperf, Gbps. This can be the case for many reasons, but in this case let's show how the two are actually can be similar. First, one must understand that while these both report Gbps, the information comes from different sources. One is measuring Gbps as reported by the client program in uperf, and another is total network throughput for all systems which were running sar. -To help explain the difference, let's use the `breakout` function of the get-metric-data.js script. By default, the query is reporting this metric from all sources of `sar-net` and type `Gbps`. Depending on where the sar tool was used, it may be collecting this information from multiple hosts, and on those hosts from multiple network type (and specific interfaces, and a direction for each, and so on). These queries, by default, do not assume how the user wants to breakout and filter this metric. The user can, however, choose to include any available breakout, which are found in the output: +To help explain the difference, let's use the `breakout` function of the get-metric-data.js script. By default, the query is reporting this metric from all sources of `sar-net` and type `Gbps`. Depending on where the sar tool was used, it may be collecting this information from multiple hosts, and on those hosts from multiple network type (and specific interfaces, and a direction for each, and so on). These queries, by default, do not assume how the user wants to breakout and filter this metric. The user can, however, choose to include any available breakout, which are found in the output: "breakouts": [ "csid", @@ -187,9 +196,8 @@ To help explain the difference, let's use the `breakout` function of the get-met "type" ] -A breakout will divide the metric into multiple metrics, one for each value of that breakout. For example, if metric data that was collected has a `csid` of 1 and 2, a breakout of csid will include two metrics. In the example below, a breakout for csid and cstype are used: +A breakout will divide the metric into multiple metrics, one for each value of that breakout. For example, if metric data that was collected has a `csid` of 1 and 2, a breakout of csid will include two metrics. In the example below, a breakout for csid and cstype are used: - # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid,cstype { "name": "sar-net", @@ -217,13 +225,15 @@ A breakout will divide the metric into multiple metrics, one for each value of t "type" ] } -Now we can see that the metric is broken-out by `cs-type` and `cs-id`. These are terms used to describe some type of physical component in your test environment. These tests happen to be from Openshift, so the nodes where these benchmarks are run are `worker` (for cs-type) nodes with `1` and `2` (for cs-id). However, this breakout is not enough to get close to the uperf metric, but we also have more breakouts available: + +Now we can see that the metric is broken-out by `cs-type` and `cs-id`. These are terms used to describe some type of physical component in your test environment. These tests happen to be from Openshift, so the nodes where these benchmarks are run are `worker` (for cs-type) nodes with `1` and `2` (for cs-id). However, this breakout is not enough to get close to the uperf metric, but we also have more breakouts available: "breakouts": [ "dev", "direction", "type" ] + So, let's use `type`, which breaks out the Gbps by virtual and physical interfaces: # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid,cstype,type @@ -275,7 +285,8 @@ So, let's use `type`, which breaks out the Gbps by virtual and physical interfac "direction" ] } -We are one step closer, in that multiple metrics, such as `<1>--` and `<2>--` show a Gbps value which is close to what uperf reports. Uperf, however, reports the data transfer for the client, and when using the `stream` test-type, this is the writes that the client is doing, which would be Tx out the client and Rx into the server. To make this more clear where this is happening, let's use another breakout available to sar-net. Gbps, `direction`. + +We are one step closer, in that multiple metrics, such as `<1>--` and `<2>--` show a Gbps value which is close to what uperf reports. Uperf, however, reports the data transfer for the client, and when using the `stream` test-type, this is the writes that the client is doing, which would be Tx out the client and Rx into the server. To make this more clear where this is happening, let's use another breakout available to sar-net. Gbps, `direction`. { "name": "sar-net", @@ -357,10 +368,11 @@ We are one step closer, in that multiple metrics, such as `<1>-----` has 6.786 Gbps, not quite the same sas uperf, but uperf reports Gbps for the messages in the program, and not the Gbps for the additional headers for TCP, IP, and Ethernet. This still does not show the Gbps for a specific interface, but that can be done with another breakout. However, as shown above, more breakouts generally produces more output, some of which you may want to filter. This can be accomplished in two ways, by limiting the value for a breakout and limiting the metrics based on the metric-value. The following uses both of these methods: + +When evaluating these breakouts, we can see that `<1>---` has 6.786 Gbps, not quite the same sas uperf, but uperf reports Gbps for the messages in the program, and not the Gbps for the additional headers for TCP, IP, and Ethernet. This still does not show the Gbps for a specific interface, but that can be done with another breakout. However, as shown above, more breakouts generally produces more output, some of which you may want to filter. This can be accomplished in two ways, by limiting the value for a breakout and limiting the metrics based on the metric-value. The following uses both of these methods: # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid=1,cstype=worker,type=physical,direction,dev --filter gt:0.01 - + { "name": "sar-net", "type": "L2-Gbps", @@ -395,11 +407,13 @@ For example, to get metrics for both worker nodes 1 and 2: This will return two separate metrics: one for `csid=1` and one for `csid=2`, without including metrics for any other csid values that might exist in the data. **Important**: The comma separator has different meanings depending on context: + - Between different breakout fields: `csid,cstype` means break out by both csid AND cstype - Within a value list: `csid=1,2` means break out by csid, but only include values 1 and 2 - Mixed usage: `csid=1,2,cstype=worker` means break out by csid (only values 1,2) and cstype (only value worker) This feature is particularly useful when: + - You want to compare specific hosts or components without seeing all possible values - You need to reduce output by focusing on a subset of values - You want to query multiple specific values in a single command instead of running separate queries @@ -437,6 +451,7 @@ node ./get-metric-data.js --period --source iostat --type kB-sec --breako **Custom Delimiter**: The character immediately after `r` or `R` is used as the delimiter. While `/` is conventional, you can use any character (like `|`, `#`, `@`, `~`) if your pattern contains forward slashes. **Regular Expression Syntax**: The patterns use OpenSearch regex syntax, which is similar to standard regex but with some differences. Common patterns include: + - `.*` - Match any characters (zero or more) - `.+` - Match any characters (one or more) - `^` - Match start of string @@ -446,6 +461,7 @@ node ./get-metric-data.js --period --source iostat --type kB-sec --breako - `(a|b)` - Match 'a' or 'b' **Use Cases:** + - Match all nodes of a certain type: `hostname=r/^worker-.*/` - Match numbered resources: `cpu=r/[0-9]+/` - Match specific patterns: `device=r/^eth[0-9]/` @@ -453,7 +469,7 @@ node ./get-metric-data.js --period --source iostat --type kB-sec --breako **Performance Note**: Regex patterns are evaluated by OpenSearch and may be slower than exact value matches for very large datasets. Use them when the flexibility is needed. -So far all of the metrics have been represented as a single value for a specific time period. When `--period` is used, the script finds the begin and end times for this period, which in most cases, has a duration equal to the measurement time in the benchmark itself (around 90 seconds in these examples). One can also specify `--run`, `--begin`, and `--end` instead of `--period`, should they need to focus on a different period of time. However, for benchmark metrics (such as uperf), it is important to limit the begin and end to within the actual measurement period for that sample. Conversely, tool metrics can use a begin and end spanning any time period within the run, as the tool collection tends to run continuously for any particular run. Whatever time period is used, one can also use `--resolution` to divide this time period into multiple data-samples, in order to generate things like line graphs: +So far all of the metrics have been represented as a single value for a specific time period. When `--period` is used, the script finds the begin and end times for this period, which in most cases, has a duration equal to the measurement time in the benchmark itself (around 90 seconds in these examples). One can also specify `--run`, `--begin`, and `--end` instead of `--period`, should they need to focus on a different period of time. However, for benchmark metrics (such as uperf), it is important to limit the begin and end to within the actual measurement period for that sample. Conversely, tool metrics can use a begin and end spanning any time period within the run, as the tool collection tends to run continuously for any particular run. Whatever time period is used, one can also use `--resolution` to divide this time period into multiple data-samples, in order to generate things like line graphs: # node ./get-metric-data.js --period 4F1014D6-AD33-11EC-94E3-ADE96E3275F7 --source sar-net --type L2-Gbps --breakout csid=1,cstype=worker,type=physical,direction=tx,dev --filter gt:0.01 --resolution 10 Checking for httpd...appears to be running @@ -518,24 +534,27 @@ So far all of the metrics have been represented as a single value for a specific }, "breakouts": [] } + ### compare-results.js -This script is used to generate comparisons across one or more runs and provides to the ability to tailor how iterations are grouped when comparing them. This script is particularly useful when you run a benchmark with different settings in your test-bed. For example, if you were to test a MTU of 1500 and then 9000, you could use this script to generate output that compares the two runs. You are, however, not limited to two runs, and you are not actually required to specify the run IDs at all. - -`compare-results.js` has two primary purposes. The first is to assemble the iterations to want to compare. This is done with options to the script: - * `--filter-by-params` - * `--filter-by-tags` - * `--filter-by-age` - * `--add-runs` - * `--add-iterations` - - When using the `--filter-by-*` options, iterations are queried from all three filters and then intersected. Users can focus on specific benchmark params and test-bed configurations, for example: - - `node ./compare-results.js --filter-by-params test-type:stream --filter-by-tags study:protocols --dont-breakout-params protocol` + +This script is used to generate comparisons across one or more runs and provides to the ability to tailor how iterations are grouped when comparing them. This script is particularly useful when you run a benchmark with different settings in your test-bed. For example, if you were to test a MTU of 1500 and then 9000, you could use this script to generate output that compares the two runs. You are, however, not limited to two runs, and you are not actually required to specify the run IDs at all. + +`compare-results.js` has two primary purposes. The first is to assemble the iterations to want to compare. This is done with options to the script: + +- `--filter-by-params` +- `--filter-by-tags` +- `--filter-by-age` +- `--add-runs` +- `--add-iterations` + +When using the `--filter-by-*` options, iterations are queried from all three filters and then intersected. Users can focus on specific benchmark params and test-bed configurations, for example: + +`node ./compare-results.js --filter-by-params test-type:stream --filter-by-tags study:protocols --dont-breakout-params protocol` All common tags: tuned:throughput-performance dir:forward study:protocols All common params: test-type:stream nthreads:1 duration:120 - - + + label mean stddevpct iter-id nthreads:1 wsize:256 @@ -568,16 +587,16 @@ This script is used to generate comparisons across one or more runs and provides wsize:32768 protocol:tcp 122.5500 41.2100 CFEFB0A2-B9EA-11EC-A682-01EC7B3275F7 -In the output above `--dont-breakout-params protocol` forces the `protocol` param to be pushed to the label instead of broken-out on the left. In most cases, the user will choose at least one param and/or one tag to not breakout, in order to create a "cluster" of results with labels (which can later be used to form a clustered bar chart). +In the output above `--dont-breakout-params protocol` forces the `protocol` param to be pushed to the label instead of broken-out on the left. In most cases, the user will choose at least one param and/or one tag to not breakout, in order to create a "cluster" of results with labels (which can later be used to form a clustered bar chart). Users can control both what gets pushed to the label, as well as the order of the breakout `--breakout-order-params`: # node ./compare-results.js --filter-by-params test-type:stream --filter-by-tags study:protocols --dont-breakout-params wsize --breakout-order-params protocol,threads - + All common tags: study:protocols All common params: test-type:stream duration:120 - - + + label mean stddevpct iter-id protocol:tcp nthreads:1 @@ -606,4 +625,4 @@ Users can control both what gets pushed to the label, as well as the order of th Also, `--breakout-order-tags` and `--dont-breakout-tags` are also available with similar functions. -Note that that while not required, `--filter-by-age` has a default of `0-30`, which filters iterations between 0 to 30 *days* old. This default is used so that queries do not unnecessarily query very old run data (unless you select a different age range). +Note that that while not required, `--filter-by-age` has a default of `0-30`, which filters iterations between 0 to 30 _days_ old. This default is used so that queries do not unnecessarily query very old run data (unless you select a different age range). diff --git a/queries/cdmq/cdm.js b/queries/cdmq/cdm.js index f20fee8b..bf5af706 100644 --- a/queries/cdmq/cdm.js +++ b/queries/cdmq/cdm.js @@ -3290,7 +3290,11 @@ getMetricDataSets = async function (instance, sets, yearDotMonth) { if (regexFilters.length > 0) { // Build helpful error message - retMsg = 'No metrics found matching the specified filter(s) for source=' + sets[idx].source + ', type=' + sets[idx].type; + retMsg = + 'No metrics found matching the specified filter(s) for source=' + + sets[idx].source + + ', type=' + + sets[idx].type; regexFilters.forEach((rf) => { retMsg += '\n Regex filter ' + rf.field + '=' + rf.pattern + ' did not match any values.'; });