Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ dependencies = [
# [end] jsonschema format validators
"sentry-arroyo>=2.30.0",
"sentry-forked-email-reply-parser>=0.5.12.post1",
"sentry-kafka-schemas>=2.1.11",
"sentry-kafka-schemas>=2.1.13",
"sentry-ophio>=1.1.3",
"sentry-protos>=0.4.2",
"sentry-redis-tools>=0.5.0",
Expand Down
5 changes: 1 addition & 4 deletions src/sentry/spans/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,10 +438,7 @@ def flush_segments(self, now: int) -> dict[SegmentKey, FlushedSegment]:
}

is_segment = segment_span_id == span["span_id"]
span.setdefault("attributes", {})["sentry.is_segment"] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the renaming from sentry.is_segment to is_segment intentional? what do we do for old vs new queries?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now a toplevel field instead of being stored in attributes as sentry.is_segment.

From my understanding on the query side nothing should change as at the end of the pipeline the top level field is materialized into EAP still as sentry.is_segment. See change in process/convert.py.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My idea was that we use is_segment in the buffer code, and then convert_span_to_item writes the attribute to sentry.is_segment such that all product queries still work as expected.

I realize now that we might get wrong behavior if the buffer still contains spans with the old format, but the segment consumer already expects the new format. Should I update the span consumer and the segment consumer in separate PRs, and let the span consumer double-write initially?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to be sure that in-flight data in redis does not cause crashes in old vs new versions of the spans consumer too. i think that if you update only the part that inserts into redis first, and then secondarily the flusher process and segments consumer, we should be good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we're good then -- the changes I'm making all happen after redis, except for the additional check of is_remote, which does not change the evaluation of the condition segment_span_id == span["span_id"] here.

"type": "boolean",
"value": is_segment,
}
span["is_segment"] = is_segment
if is_segment:
has_root_span = True

Expand Down
7 changes: 6 additions & 1 deletion src/sentry/spans/consumers/process/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,12 @@ def process_batch(
project_id=val["project_id"],
payload=payload.value,
end_timestamp=cast(float, val["end_timestamp"]),
is_segment_span=bool(val.get("parent_span_id") is None or val.get("is_remote")),
# TODO(INGEST-612): Remove "is_remote" as soon as Relay writes "is_segment".
is_segment_span=bool(
val.get("parent_span_id") is None
or val.get("is_segment")
or val.get("is_remote")
),
)

spans.append(span)
Expand Down
5 changes: 5 additions & 0 deletions src/sentry/spans/consumers/process_segments/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"end_timestamp": "sentry.end_timestamp_precise",
"event_id": "sentry.event_id",
"hash": "sentry.hash",
# TODO(INGEST-612): Remove "is_remote" once Relay has stopped writing it.
"is_remote": "sentry.is_remote",
"kind": "sentry.kind",
"name": "sentry.name",
Expand Down Expand Up @@ -57,6 +58,10 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem:
except ValueError:
pass

# For `is_segment`, we trust the value written by `flush_segments` over a pre-existing attribute:
if (is_segment := span.get("is_segment")) is not None:
attributes["sentry.is_segment"] = _anyvalue(is_segment)

for field_name, attribute_name in FIELD_TO_ATTRIBUTE.items():
attribute = span.get(field_name) # type:ignore[assignment]
if attribute is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _find_segment_span(spans: list[SpanEvent]) -> SpanEvent | None:

# Iterate backwards since we usually expect the segment span to be at the end.
for span in reversed(spans):
if attribute_value(span, "sentry.is_segment"):
if span.get("is_segment"):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flush_segments now unconditionally sets this field (see buffer.py), so it safe to migrate away from attributes[sentry.is_segment] even if Relay does not write the top-level field yet.

return span

return None
Expand Down
1 change: 0 additions & 1 deletion src/sentry/spans/consumers/process_segments/shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def make_compatible(span: SpanEvent) -> CompatibleSpan:
"sentry_tags": _sentry_tags(span.get("attributes") or {}),
"op": get_span_op(span),
"exclusive_time": attribute_value(span, "sentry.exclusive_time_ms"),
"is_segment": bool(attribute_value(span, "sentry.is_segment")),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field is now set by flush_segments.

}

return ret
Expand Down
1 change: 0 additions & 1 deletion src/sentry/spans/consumers/process_segments/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class CompatibleSpan(SpanEvent, total=True):
exclusive_time: float
op: str
sentry_tags: dict[str, str]
is_segment: bool
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now part of the official schema.


# Added by `SpanGroupingResults.write_to_spans` in `_enrich_spans`
hash: NotRequired[str]
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/spans/grouping/strategy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_standalone_span_group(self, span: Span) -> str:
# compatibility with transaction events, but fall back to default
# fingerprinting if the span doesn't have a transaction.
if (
attribute_value(span, "sentry.is_segment")
span.get("is_segment")
and (transaction := attribute_value(span, "sentry.transaction")) is not None
):
result = Hash()
Expand Down
4 changes: 2 additions & 2 deletions tests/sentry/spans/consumers/process/test_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def add_commit(offsets, force=False):
"received": 1699999999.0,
"name": "test-span",
"status": "ok",
"is_remote": False,
"is_segment": False,
}
# Set the field to None
span_data[field_to_set_none] = None
Expand Down Expand Up @@ -202,7 +202,7 @@ def add_commit(offsets, force=False):
"received": 1699999999.0,
"name": "test-span",
"status": "ok",
"is_remote": False,
"is_segment": False,
}

step.submit(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
###############################################

SPAN_KAFKA_MESSAGE: SpanEvent = {
"is_remote": True,
"is_segment": True,
"attributes": {
"http.status_code": {"value": "200", "type": "string"},
"my.array.field": {"value": [1, 2, ["nested", "array"]], "type": "array"},
Expand Down
6 changes: 3 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading