diff --git a/CHANGELOG.md b/CHANGELOG.md index fc45057286..a5e480a0eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ **Features**: - Double write to legacy attributes for backwards compatibility. ([#5490](https://github.com/getsentry/relay/pull/5490)) +- Enables basic array support for logs, trace metrics and spans. ([#5394](https://github.com/getsentry/relay/pull/5394)) ## 25.12.0 diff --git a/relay-event-normalization/src/eap/mod.rs b/relay-event-normalization/src/eap/mod.rs index fc7c2158f2..9d6a6cc73a 100644 --- a/relay-event-normalization/src/eap/mod.rs +++ b/relay-event-normalization/src/eap/mod.rs @@ -66,6 +66,12 @@ pub fn normalize_attribute_types(attributes: &mut Annotated) { (Annotated(Some(Double), _), Annotated(Some(Value::U64(_)), _)) => (), (Annotated(Some(Double), _), Annotated(Some(Value::F64(_)), _)) => (), (Annotated(Some(String), _), Annotated(Some(Value::String(_)), _)) => (), + (Annotated(Some(Array), _), Annotated(Some(Value::Array(arr)), _)) => { + if !is_supported_array(arr) { + let _ = attribute.value_mut().take(); + attribute.meta_mut().add_error(ErrorKind::InvalidData); + } + } // Note: currently the mapping to Kafka requires that invalid or unknown combinations // of types and values are removed from the mapping. // @@ -90,6 +96,53 @@ pub fn normalize_attribute_types(attributes: &mut Annotated) { } } +/// Returns `true` if the passed array is an array we currently support. +/// +/// Currently all arrays must be homogeneous types. +fn is_supported_array(arr: &[Annotated]) -> bool { + let mut iter = arr.iter(); + + let Some(first) = iter.next() else { + // Empty arrays are supported. + return true; + }; + + let item = iter.try_fold(first, |prev, current| { + let r = match (prev.value(), current.value()) { + (None, None) => prev, + (None, Some(_)) => current, + (Some(_), None) => prev, + (Some(Value::String(_)), Some(Value::String(_))) => prev, + (Some(Value::Bool(_)), Some(Value::Bool(_))) => prev, + ( + // We allow mixing different numeric types because they are all the same in JSON. + Some(Value::I64(_) | Value::U64(_) | Value::F64(_)), + Some(Value::I64(_) | Value::U64(_) | Value::F64(_)), + ) => prev, + // Everything else is unsupported. + // + // This includes nested arrays, nested objects and mixed arrays for now. + (Some(_), Some(_)) => return None, + }; + + Some(r) + }); + + let Some(item) = item else { + // Unsupported combination of types. + return false; + }; + + matches!( + item.value(), + // `None` -> `[null, null]` is allowed, as the `Annotated` may carry information. + // `Some` -> must be a currently supported type. + None | Some( + Value::String(_) | Value::Bool(_) | Value::I64(_) | Value::U64(_) | Value::F64(_) + ) + ) +} + /// Adds the `received` time to the attributes. pub fn normalize_received(attributes: &mut Annotated, received: DateTime) { attributes @@ -626,13 +679,37 @@ mod tests { }, "missing_value": { "type": "string" + }, + "supported_array_string": { + "type": "array", + "value": ["foo", "bar"] + }, + "supported_array_double": { + "type": "array", + "value": [3, 3.0, 3] + }, + "supported_array_null": { + "type": "array", + "value": [null, null] + }, + "unsupported_array_mixed": { + "type": "array", + "value": ["foo", 1.0] + }, + "unsupported_array_object": { + "type": "array", + "value": [{}] + }, + "unsupported_array_in_array": { + "type": "array", + "value": [[]] } }"#; let mut attributes = Annotated::::from_json(json).unwrap(); normalize_attribute_types(&mut attributes); - insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###" + insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#" { "double_with_i64": { "type": "double", @@ -641,7 +718,32 @@ mod tests { "invalid_int_from_invalid_string": null, "missing_type": null, "missing_value": null, + "supported_array_double": { + "type": "array", + "value": [ + 3, + 3.0, + 3 + ] + }, + "supported_array_null": { + "type": "array", + "value": [ + null, + null + ] + }, + "supported_array_string": { + "type": "array", + "value": [ + "foo", + "bar" + ] + }, "unknown_type": null, + "unsupported_array_in_array": null, + "unsupported_array_mixed": null, + "unsupported_array_object": null, "valid_bool": { "type": "boolean", "value": true @@ -717,6 +819,27 @@ mod tests { } } }, + "unsupported_array_in_array": { + "": { + "err": [ + "invalid_data" + ] + } + }, + "unsupported_array_mixed": { + "": { + "err": [ + "invalid_data" + ] + } + }, + "unsupported_array_object": { + "": { + "err": [ + "invalid_data" + ] + } + }, "valid_int_from_string": { "": { "err": [ @@ -730,7 +853,7 @@ mod tests { } } } - "###); + "#); } #[test] diff --git a/relay-event-schema/src/protocol/attributes.rs b/relay-event-schema/src/protocol/attributes.rs index a277f315b9..6e98bf09fc 100644 --- a/relay-event-schema/src/protocol/attributes.rs +++ b/relay-event-schema/src/protocol/attributes.rs @@ -120,10 +120,29 @@ pub fn attribute_pii_from_conventions(state: &ProcessingState) -> Pii { #[derive(Debug, Clone, PartialEq, Eq)] pub enum AttributeType { + /// A boolean. + /// + /// The respective value must be of type [`Value::Bool`]. Boolean, + /// An integer type. + /// + /// The respective value must be of type [`Value::I64`] or [`Value::U64`]. Integer, + /// A floating point/double type. + /// + /// The respective value must be of type [`Value::F64`]. Double, + /// A string type. + /// + /// The respective value must be of type [`Value::String`]. String, + /// A string type. + /// + /// The respective value must be of type [`Value::Array`]. + Array, + /// An unknown type. + /// + /// Kept for forward compatibility. Unknown(String), } @@ -136,6 +155,7 @@ impl AttributeType { Self::Integer => "integer", Self::Double => "double", Self::String => "string", + Self::Array => "array", Self::Unknown(value) => value, } } @@ -158,6 +178,7 @@ impl From for AttributeType { "integer" => Self::Integer, "double" => Self::Double, "string" => Self::String, + "array" => Self::Array, _ => Self::Unknown(value), } } diff --git a/relay-otel/src/lib.rs b/relay-otel/src/lib.rs index 5411fd6558..66330462f8 100644 --- a/relay-otel/src/lib.rs +++ b/relay-otel/src/lib.rs @@ -37,31 +37,26 @@ pub fn otel_value_to_attribute(otel_value: OtelValue) -> Option { (AttributeType::String, Value::String(s)) } OtelValue::ArrayValue(array) => { - // Filter out nested arrays and key-value lists for safety. - // This is not usually allowed by the OTLP protocol, but we filter - // these values out before serializing for robustness. - let safe_values: Vec = array + let values: Vec> = array .values .into_iter() - .filter_map(|v| match v.value? { - OtelValue::StringValue(s) => Some(serde_json::Value::String(s)), - OtelValue::BoolValue(b) => Some(serde_json::Value::Bool(b)), - OtelValue::IntValue(i) => { - Some(serde_json::Value::Number(serde_json::Number::from(i))) - } - OtelValue::DoubleValue(d) => { - serde_json::Number::from_f64(d).map(serde_json::Value::Number) - } - OtelValue::BytesValue(bytes) => { - String::from_utf8(bytes).ok().map(serde_json::Value::String) - } - // Skip nested complex types for safety - OtelValue::ArrayValue(_) | OtelValue::KvlistValue(_) => None, + .filter_map(|v| { + Some(match v.value? { + OtelValue::StringValue(s) => Value::String(s), + OtelValue::BoolValue(b) => Value::Bool(b), + OtelValue::IntValue(i) => Value::I64(i), + OtelValue::DoubleValue(d) => Value::F64(d), + OtelValue::BytesValue(bytes) => { + Value::String(String::from_utf8(bytes).ok()?) + } + // Currently not supported. + OtelValue::ArrayValue(_) | OtelValue::KvlistValue(_) => return None, + }) }) + .map(Annotated::new) .collect(); - let json = serde_json::to_string(&safe_values).ok()?; - (AttributeType::String, Value::String(json)) + (AttributeType::Array, Value::Array(values)) } OtelValue::KvlistValue(kvlist) => { // Convert key-value list to JSON object and serialize as string. @@ -226,10 +221,18 @@ mod tests { let attr = otel_value_to_attribute(otel_value).unwrap(); let value = &attr.value.value; - assert_eq!( - get_value!(value!), - &Value::String("[\"item1\",42]".to_owned()) - ); + insta::assert_debug_snapshot!(value, @r#" + Array( + [ + String( + "item1", + ), + I64( + 42, + ), + ], + ) + "#); } #[test] diff --git a/relay-ourlogs/src/otel_to_sentry.rs b/relay-ourlogs/src/otel_to_sentry.rs index 1dfa543144..bff0f3035d 100644 --- a/relay-ourlogs/src/otel_to_sentry.rs +++ b/relay-ourlogs/src/otel_to_sentry.rs @@ -224,8 +224,11 @@ mod tests { "body": "Example log record", "attributes": { "array.attribute": { - "type": "string", - "value": "[\"many\",\"values\"]" + "type": "array", + "value": [ + "many", + "values" + ] }, "boolean.attribute": { "type": "boolean", diff --git a/relay-server/src/processing/logs/store.rs b/relay-server/src/processing/logs/store.rs index d6f68ebf7a..4393dc5751 100644 --- a/relay-server/src/processing/logs/store.rs +++ b/relay-server/src/processing/logs/store.rs @@ -2,15 +2,15 @@ use std::collections::HashMap; use chrono::{DateTime, Utc}; use relay_event_schema::protocol::{Attributes, OurLog, OurLogLevel, SpanId}; -use relay_protocol::{Annotated, IntoValue, Value}; +use relay_protocol::Annotated; use relay_quotas::Scoping; use sentry_protos::snuba::v1::{AnyValue, TraceItem, TraceItemType, any_value}; use uuid::Uuid; use crate::envelope::WithHeader; use crate::processing::logs::{Error, Result}; -use crate::processing::utils::store::{AttributeMeta, extract_meta_attributes, proto_timestamp}; -use crate::processing::{Counted, Retention}; +use crate::processing::utils::store::{extract_meta_attributes, proto_timestamp}; +use crate::processing::{self, Counted, Retention}; use crate::services::outcome::DiscardReason; use crate::services::store::StoreTraceItem; @@ -114,42 +114,7 @@ fn attributes( // +N, one for each field attribute added and some extra for potential meta. result.reserve(attributes.0.len() + 5 + 3); - for (name, attribute) in attributes { - let meta = AttributeMeta { - meta: IntoValue::extract_meta_tree(&attribute), - }; - if let Some(meta) = meta.to_any_value() { - result.insert(format!("sentry._meta.fields.attributes.{name}"), meta); - } - - let value = attribute - .into_value() - .and_then(|v| v.value.value.into_value()); - - let Some(value) = value else { - // Meta has already been handled, no value -> skip. - // There are also no current plans to handle `null` in EAP. - continue; - }; - - let Some(value) = (match value { - Value::Bool(v) => Some(any_value::Value::BoolValue(v)), - Value::I64(v) => Some(any_value::Value::IntValue(v)), - Value::U64(v) => i64::try_from(v).ok().map(any_value::Value::IntValue), - Value::F64(v) => Some(any_value::Value::DoubleValue(v)), - Value::String(v) => Some(any_value::Value::StringValue(v)), - // These cases do not happen, as they are not valid attributes - // and they should have been filtered out before already. - Value::Array(_) | Value::Object(_) => { - debug_assert!(false, "unsupported log value"); - None - } - }) else { - continue; - }; - - result.insert(name, AnyValue { value: Some(value) }); - } + processing::utils::store::convert_attributes_into(&mut result, attributes); let FieldAttributes { level, diff --git a/relay-server/src/processing/trace_metrics/store.rs b/relay-server/src/processing/trace_metrics/store.rs index 1f6631d300..ed4211ec6c 100644 --- a/relay-server/src/processing/trace_metrics/store.rs +++ b/relay-server/src/processing/trace_metrics/store.rs @@ -4,17 +4,15 @@ use chrono::{DateTime, Utc}; use prost_types::Timestamp; use relay_base_schema::metrics::MetricUnit; use relay_event_schema::protocol::{Attributes, MetricType, SpanId, TraceMetric}; -use relay_protocol::{Annotated, IntoValue, Value}; +use relay_protocol::{Annotated, Value}; use relay_quotas::Scoping; use sentry_protos::snuba::v1::{AnyValue, TraceItem, TraceItemType, any_value}; use uuid::Uuid; use crate::envelope::WithHeader; use crate::processing::trace_metrics::{Error, Result}; -use crate::processing::utils::store::{ - AttributeMeta, extract_client_sample_rate, extract_meta_attributes, -}; -use crate::processing::{Counted, Retention}; +use crate::processing::utils::store::{extract_client_sample_rate, extract_meta_attributes}; +use crate::processing::{self, Counted, Retention}; use crate::services::outcome::DiscardReason; use crate::services::store::StoreTraceItem; @@ -114,40 +112,10 @@ fn attributes( fields: FieldAttributes, ) -> HashMap { let mut result = meta; - result.reserve(attributes.0.len() + 5); + // +N, one for each field attribute added and some extra for potential meta. + result.reserve(attributes.0.len() + 15); - for (name, attribute) in attributes { - let meta = AttributeMeta { - meta: IntoValue::extract_meta_tree(&attribute), - }; - if let Some(meta) = meta.to_any_value() { - result.insert(format!("sentry._meta.fields.attributes.{name}"), meta); - } - - let value = attribute - .into_value() - .and_then(|v| v.value.value.into_value()); - - let Some(value) = value else { - continue; - }; - - let Some(value) = (match value { - Value::Bool(v) => Some(any_value::Value::BoolValue(v)), - Value::I64(v) => Some(any_value::Value::IntValue(v)), - Value::U64(v) => i64::try_from(v).ok().map(any_value::Value::IntValue), - Value::F64(v) => Some(any_value::Value::DoubleValue(v)), - Value::String(v) => Some(any_value::Value::StringValue(v)), - Value::Array(_) | Value::Object(_) => { - debug_assert!(false, "unsupported trace metric value"); - None - } - }) else { - continue; - }; - - result.insert(name, AnyValue { value: Some(value) }); - } + processing::utils::store::convert_attributes_into(&mut result, attributes); let FieldAttributes { metric_name, diff --git a/relay-server/src/processing/utils/store.rs b/relay-server/src/processing/utils/store.rs index 15da16ad26..1010da9835 100644 --- a/relay-server/src/processing/utils/store.rs +++ b/relay-server/src/processing/utils/store.rs @@ -3,9 +3,9 @@ use std::collections::HashMap; use chrono::Utc; use relay_conventions::CLIENT_SAMPLE_RATE; use relay_event_schema::protocol::Attributes; -use relay_protocol::{Annotated, IntoValue, MetaTree}; +use relay_protocol::{Annotated, IntoValue, MetaTree, Value}; -use sentry_protos::snuba::v1::{AnyValue, any_value}; +use sentry_protos::snuba::v1::{AnyValue, ArrayValue, any_value}; use serde::Serialize; /// Represents metadata extracted from Relay's annotated model. @@ -123,6 +123,73 @@ fn size_of_meta_tree(meta: &MetaTree) -> usize { size } +/// Converts [`Attributes`] into EAP compatible values. +pub fn convert_attributes_into(result: &mut HashMap, attributes: Attributes) { + for (name, attribute) in attributes { + let meta = AttributeMeta { + meta: IntoValue::extract_meta_tree(&attribute), + }; + if let Some(meta) = meta.to_any_value() { + result.insert(format!("sentry._meta.fields.attributes.{name}"), meta); + } + + let value = attribute + .into_value() + .and_then(|v| v.value.value.into_value()); + + let Some(value) = value else { + // Meta has already been handled, no value -> skip. + // There are also no current plans to handle `null` in EAP. + continue; + }; + + // Assertions for invalid types should never happen as Relay filters and validates + // attributes beforehand already. + let Some(value) = (match value { + Value::Bool(v) => Some(any_value::Value::BoolValue(v)), + Value::I64(v) => Some(any_value::Value::IntValue(v)), + Value::U64(v) => i64::try_from(v).ok().map(any_value::Value::IntValue), + Value::F64(v) => Some(any_value::Value::DoubleValue(v)), + Value::String(v) => Some(any_value::Value::StringValue(v)), + Value::Array(v) => Some(any_value::Value::ArrayValue(ArrayValue { + values: v + .into_iter() + .filter_map(|v| { + let Some(v) = v.into_value() else { + return Some(AnyValue { value: None }); + }; + + let v = match v { + Value::Bool(v) => any_value::Value::BoolValue(v), + Value::I64(v) => any_value::Value::IntValue(v), + Value::U64(v) => any_value::Value::IntValue(v as i64), + Value::F64(v) => any_value::Value::DoubleValue(v), + Value::String(v) => any_value::Value::StringValue(v), + Value::Array(_) | Value::Object(_) => { + debug_assert!( + false, + "arrays and objects nested in arrays is not yet supported" + ); + return None; + } + }; + + Some(AnyValue { value: Some(v) }) + }) + .collect(), + })), + Value::Object(_) => { + debug_assert!(false, "objects are not yet supported"); + None + } + }) else { + continue; + }; + + result.insert(name, AnyValue { value: Some(value) }); + } +} + /// Converts a [`chrono::DateTime`] into a [`prost_types::Timestamp`] pub fn proto_timestamp(dt: chrono::DateTime) -> prost_types::Timestamp { prost_types::Timestamp { diff --git a/relay-spans/src/otel_to_sentry.rs b/relay-spans/src/otel_to_sentry.rs index d1f88b45b1..5ef20ff260 100644 --- a/relay-spans/src/otel_to_sentry.rs +++ b/relay-spans/src/otel_to_sentry.rs @@ -481,8 +481,14 @@ mod tests { "description": "cmd.run", "data": { "sentry.name": "cmd.run", - "process.args": "[\"node\",\"--require\",\"preflight.cjs\"]", - "process.info": "[41]", + "process.args": [ + "node", + "--require", + "preflight.cjs" + ], + "process.info": [ + 41 + ], "sentry.origin": "auto.otlp.spans" }, "links": [] @@ -643,7 +649,7 @@ mod tests { "sentry.segment.name": "my 1st transaction", "sentry.sdk.name": "sentry.php", "sentry.name": "myname", - "sentry.metrics_summary.some_metric": "[]", + "sentry.metrics_summary.some_metric": [], "sentry.origin": "auto.otlp.spans", "sentry.status.message": "foo" }, diff --git a/relay-spans/src/otel_to_sentry_v2.rs b/relay-spans/src/otel_to_sentry_v2.rs index 3bce19f64f..7fac51b863 100644 --- a/relay-spans/src/otel_to_sentry_v2.rs +++ b/relay-spans/src/otel_to_sentry_v2.rs @@ -784,8 +784,8 @@ mod tests { "value": "prod" }, "sentry.metrics_summary.some_metric": { - "type": "string", - "value": "[]" + "type": "array", + "value": [] }, "sentry.op": { "type": "string", diff --git a/tests/integration/test_otlp_logs.py b/tests/integration/test_otlp_logs.py index 59829c43e8..c5c176ea56 100644 --- a/tests/integration/test_otlp_logs.py +++ b/tests/integration/test_otlp_logs.py @@ -116,7 +116,11 @@ def test_otlp_logs_conversion( assert items == [ { "attributes": { - "array.attribute": {"stringValue": '["first","second"]'}, + "array.attribute": { + "arrayValue": { + "values": [{"stringValue": "first"}, {"stringValue": "second"}] + } + }, "boolean.attribute": {"boolValue": True}, "double.attribute": {"doubleValue": 637.704}, "instrumentation.name": {"stringValue": "test-library"}, @@ -130,7 +134,7 @@ def test_otlp_logs_conversion( "stringValue": time_within(ts, expect_resolution="ns") }, "sentry.origin": {"stringValue": "auto.otlp.logs"}, - "sentry.payload_size_bytes": {"intValue": "385"}, + "sentry.payload_size_bytes": {"intValue": "378"}, "sentry.severity_text": {"stringValue": "info"}, "sentry.span_id": {"stringValue": "eee19b7ec3c1b174"}, "sentry.timestamp_precise": { @@ -173,7 +177,7 @@ def test_otlp_logs_conversion( "org_id": 1, "outcome": 0, "project_id": 42, - "quantity": 385, + "quantity": 378, }, ] diff --git a/tests/integration/test_ourlogs.py b/tests/integration/test_ourlogs.py index a90d808b35..6aee6d5b3e 100644 --- a/tests/integration/test_ourlogs.py +++ b/tests/integration/test_ourlogs.py @@ -128,13 +128,13 @@ def test_ourlog_multiple_containers_not_allowed( # 296 here is a billing relevant metric, do not arbitrarily change it, # this value is supposed to be static and purely based on data received, # independent of any normalization. - (None, 296), + (None, 377), # Same applies as above, a proxy Relay does not need to run normalization. - ("proxy", 296), + ("proxy", 377), # If an external Relay/Client makes modifications, sizes can change, # this is fuzzy due to slight changes in sizes due to added timestamps # and may need to be adjusted when changing normalization. - ("managed", 521), + ("managed", 587), ], ) def test_ourlog_extraction_with_sentry_logs( @@ -194,11 +194,18 @@ def test_ourlog_extraction_with_sentry_logs( "double.attribute": {"value": 1.23, "type": "double"}, "string.attribute": {"value": "some string", "type": "string"}, "pii": {"value": "4242 4242 4242 4242", "type": "string"}, + "pii_array": { + "value": ["foo", "4242424242424242", "bar"], + "type": "array", + }, "sentry.severity_text": {"value": "info", "type": "string"}, "http.response_content_length": {"value": 17, "type": "integer"}, "unknown_type": {"value": "info", "type": "unknown"}, "broken_type": {"value": "info", "type": "not_a_real_type"}, "mismatched_type": {"value": "some string", "type": "boolean"}, + "string_array": {"value": ["foo", "bar"], "type": "array"}, + "mixed_array": {"value": ["foo", 3.0], "type": "array"}, + "null_array": {"value": [None, None], "type": "array"}, "valid_string_with_other": { "value": "test", "type": "string", @@ -240,6 +247,9 @@ def test_ourlog_extraction_with_sentry_logs( "sentry._meta.fields.attributes.broken_type": { "stringValue": '{"meta":{"":{"err":["invalid_data"],"val":{"type":"not_a_real_type","value":"info"}}}}' }, + "sentry._meta.fields.attributes.mixed_array": { + "stringValue": '{"meta":{"":{"err":["invalid_data"]}}}' + }, "sentry._meta.fields.attributes.mismatched_type": { "stringValue": '{"meta":{"":{"err":["invalid_data"],"val":{"type":"boolean","value":"some ' 'string"}}}}' @@ -250,10 +260,23 @@ def test_ourlog_extraction_with_sentry_logs( "boolean.attribute": {"boolValue": True}, "double.attribute": {"doubleValue": 1.23}, "integer.attribute": {"intValue": "42"}, + "null_array": {"arrayValue": {"values": [{}, {}]}}, "pii": {"stringValue": "[creditcard]"}, "sentry._meta.fields.attributes.pii": { "stringValue": '{"meta":{"value":{"":{"rem":[["@creditcard","s",0,12]],"len":19}}}}' }, + "pii_array": { + "arrayValue": { + "values": [ + {"stringValue": "foo"}, + {"stringValue": "[creditcard]"}, + {"stringValue": "bar"}, + ] + } + }, + "sentry._meta.fields.attributes.pii_array": { + "stringValue": '{"meta":{"value":{"1":{"":{"rem":[["@creditcard","s",0,12]],"len":16}}}}}' + }, "sentry.body": {"stringValue": "Example log record"}, "sentry.browser.name": {"stringValue": "Python Requests"}, "sentry.browser.version": {"stringValue": "2.32"}, @@ -263,6 +286,11 @@ def test_ourlog_extraction_with_sentry_logs( "http.response.body.size": {"intValue": "17"}, "sentry.span_id": {"stringValue": "eee19b7ec3c1b174"}, "string.attribute": {"stringValue": "some string"}, + "string_array": { + "arrayValue": { + "values": [{"stringValue": "foo"}, {"stringValue": "bar"}] + } + }, "valid_string_with_other": {"stringValue": "test"}, **timestamps(ts), }, @@ -488,10 +516,6 @@ def test_ourlog_extraction_default_pii_scrubbing_does_not_scrub_default_attribut items_consumer = items_consumer() project_id = 42 project_config = mini_sentry.add_full_project_config(project_id) - project_config["config"]["retentions"] = { - "log": {"standard": 30, "downsampled": 13 * 30}, - } - project_config["config"]["features"] = [ "organizations:ourlogs-ingestion", ] @@ -554,8 +578,8 @@ def test_ourlog_extraction_default_pii_scrubbing_does_not_scrub_default_attribut "organizationId": "1", "projectId": "42", "received": time_within_delta(), - "retentionDays": 30, - "downsampledRetentionDays": 390, + "retentionDays": 90, + "downsampledRetentionDays": 90, "serverSampleRate": 1.0, "timestamp": time_within_delta( ts, delta=timedelta(seconds=1), expect_resolution="ns" @@ -572,21 +596,10 @@ def test_ourlog_extraction_with_sentry_logs_with_missing_fields( items_consumer = items_consumer() project_id = 42 project_config = mini_sentry.add_full_project_config(project_id) - project_config["config"]["retentions"] = { - "log": {"standard": 30, "downsampled": 13 * 30}, - } - project_config["config"]["features"] = [ "organizations:ourlogs-ingestion", ] - project_config["config"].setdefault( - "datascrubbingSettings", - { - "scrubData": True, - "scrubDefaults": True, - "scrubIpAddresses": True, - }, - ) + relay = relay_with_processing(options=TEST_CONFIG) ts = datetime.now(timezone.utc) @@ -616,8 +629,8 @@ def test_ourlog_extraction_with_sentry_logs_with_missing_fields( "organizationId": "1", "projectId": "42", "received": time_within_delta(), - "retentionDays": 30, - "downsampledRetentionDays": 390, + "retentionDays": 90, + "downsampledRetentionDays": 90, "serverSampleRate": 1.0, "timestamp": time_within_delta( ts, delta=timedelta(seconds=1), expect_resolution="ns" diff --git a/tests/integration/test_spansv2.py b/tests/integration/test_spansv2.py index 687ecd82c9..760a8b3da3 100644 --- a/tests/integration/test_spansv2.py +++ b/tests/integration/test_spansv2.py @@ -72,6 +72,7 @@ def test_spansv2_basic( "status": "ok", "attributes": { "foo": {"value": "bar", "type": "string"}, + "array": {"value": ["foo", "bar"], "type": "array"}, "invalid": {"value": True, "type": "string"}, "http.response_content_length": {"value": 17, "type": "integer"}, }, @@ -91,6 +92,7 @@ def test_spansv2_basic( "trace_id": "5b8efff798038103d269b633813fc60c", "span_id": "eee19b7ec3c1b175", "attributes": { + "array": {"type": "array", "value": ["foo", "bar"]}, "foo": {"type": "string", "value": "bar"}, "http.response_content_length": {"value": 17, "type": "integer"}, "http.response.body.size": {"value": 17, "type": "integer"}, @@ -983,6 +985,90 @@ def test_spanv2_default_pii_scrubbing_attributes( assert rem_info[0][0] == rule_type +def test_spanv2_meta_pii_scrubbing_complex_attribute(mini_sentry, relay): + """ + Tests PII scrubbing works as expected for arrays and in the future objects. + """ + project_id = 42 + project_config = mini_sentry.add_full_project_config(project_id) + project_config["config"]["features"] = [ + "organizations:standalone-span-ingestion", + "projects:span-v2-experimental-processing", + ] + project_config["config"]["datascrubbingSettings"] = { + "scrubData": True, + "scrubDefaults": True, + } + + relay = relay(mini_sentry, options=TEST_CONFIG) + ts = datetime.now(timezone.utc) + + envelope = envelope_with_spans( + { + "start_timestamp": ts.timestamp(), + "end_timestamp": ts.timestamp() + 0.5, + "trace_id": "5b8efff798038103d269b633813fc60c", + "span_id": "eee19b7ec3c1b174", + "name": "Test span", + "status": "ok", + "is_segment": False, + "attributes": { + "pii_array": { + "value": ["normal", "4242424242424242", "other"], + "type": "array", + }, + }, + }, + trace_info={ + "trace_id": "5b8efff798038103d269b633813fc60c", + "public_key": project_config["publicKeys"][0]["publicKey"], + }, + ) + + relay.send_envelope(project_id, envelope) + + envelope = mini_sentry.get_captured_envelope() + item_payload = json.loads(envelope.items[0].payload.bytes.decode()) + item = item_payload["items"][0] + + assert item == { + "trace_id": "5b8efff798038103d269b633813fc60c", + "span_id": "eee19b7ec3c1b174", + "attributes": { + "pii_array": { + "type": "array", + "value": ["normal", "[creditcard]", "other"], + }, + "sentry.browser.name": {"type": "string", "value": "Python Requests"}, + "sentry.browser.version": {"type": "string", "value": "2.32"}, + "sentry.observed_timestamp_nanos": { + "type": "string", + "value": time_within(ts, expect_resolution="ns"), + }, + "sentry.op": {"type": "string", "value": "default"}, + }, + "_meta": { + "attributes": { + "pii_array": { + "value": { + "1": { + "": { + "len": mock.ANY, + "rem": [["@creditcard", mock.ANY, mock.ANY, mock.ANY]], + } + } + } + } + }, + }, + "name": "Test span", + "start_timestamp": time_is(ts), + "end_timestamp": time_is(ts.timestamp() + 0.5), + "is_segment": False, + "status": "ok", + } + + def test_spansv2_attribute_normalization( mini_sentry, relay, diff --git a/tests/integration/test_trace_metrics.py b/tests/integration/test_trace_metrics.py index 4e5fae9258..1175978e65 100644 --- a/tests/integration/test_trace_metrics.py +++ b/tests/integration/test_trace_metrics.py @@ -59,6 +59,7 @@ def test_trace_metric_extraction( "attributes": { "http.method": {"value": "GET", "type": "string"}, "http.status_code": {"value": 200, "type": "integer"}, + "http.some.headers": {"value": ["foo", "bar"], "type": "array"}, "sentry.client_sample_rate": {"value": 0.25, "type": "double"}, }, } @@ -69,6 +70,11 @@ def test_trace_metric_extraction( item = items_consumer.get_item() assert item == { "attributes": { + "http.some.headers": { + "arrayValue": { + "values": [{"stringValue": "foo"}, {"stringValue": "bar"}] + } + }, "sentry.metric_name": {"stringValue": "http.request.duration"}, "sentry.metric_type": {"stringValue": "distribution"}, "sentry.metric_unit": {"stringValue": "millisecond"},