From 57511f6022356e3e3e0d4b88091664fd3699ab45 Mon Sep 17 00:00:00 2001 From: Peter Solnica Date: Fri, 28 Nov 2025 15:00:40 +0000 Subject: [PATCH 1/4] wip - add sentry-trace header propagator for DT --- lib/sentry/opentelemetry/propagator.ex | 153 ++++++++++ test/sentry/opentelemetry/propagator_test.exs | 284 ++++++++++++++++++ 2 files changed, 437 insertions(+) create mode 100644 lib/sentry/opentelemetry/propagator.ex create mode 100644 test/sentry/opentelemetry/propagator_test.exs diff --git a/lib/sentry/opentelemetry/propagator.ex b/lib/sentry/opentelemetry/propagator.ex new file mode 100644 index 00000000..2b126e0d --- /dev/null +++ b/lib/sentry/opentelemetry/propagator.ex @@ -0,0 +1,153 @@ +if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do + defmodule Sentry.OpenTelemetry.Propagator do + @moduledoc """ + OpenTelemetry propagator for Sentry distributed tracing. + + This propagator implements the `sentry-trace` and `sentry-baggage` header propagation + to enable distributed tracing across service boundaries. It follows the W3C Trace Context. + + ## Usage + + Add the propagator to your OpenTelemetry configuration: + + config :opentelemetry, + text_map_propagators: [ + :trace_context, + :baggage, + Sentry.OpenTelemetry.Propagator + ] + + This will enable automatic propagation of Sentry trace context in HTTP headers. + """ + + import Bitwise + + require Record + require OpenTelemetry.Tracer, as: Tracer + + @behaviour :otel_propagator_text_map + + @fields Record.extract(:span_ctx, from_lib: "opentelemetry_api/include/opentelemetry.hrl") + Record.defrecordp(:span_ctx, @fields) + + @sentry_trace_key "sentry-trace" + @sentry_baggage_key "baggage" + @sentry_trace_ctx_key :"sentry-trace" + @sentry_baggage_ctx_key :"sentry-baggage" + + @impl true + def fields(_opts) do + [@sentry_trace_key, @sentry_baggage_key] + end + + @impl true + def inject(ctx, carrier, setter, _opts) do + case Tracer.current_span_ctx(ctx) do + span_ctx(trace_id: tid, span_id: sid, trace_flags: flags) when tid != 0 and sid != 0 -> + sentry_trace_header = encode_sentry_trace({tid, sid, flags}) + carrier = setter.(@sentry_trace_key, sentry_trace_header, carrier) + + # Inject baggage if it exists in the context + # Note: :otel_ctx.get_value/2 returns the key itself if value not found + baggage_value = :otel_ctx.get_value(ctx, @sentry_baggage_ctx_key, :not_found) + + if is_binary(baggage_value) and baggage_value != :not_found do + setter.(@sentry_baggage_key, baggage_value, carrier) + else + carrier + end + + _ -> + carrier + end + end + + @impl true + def extract(ctx, carrier, _keys_fun, getter, _opts) do + case getter.(@sentry_trace_key, carrier) do + :undefined -> + ctx + + header when is_binary(header) -> + case decode_sentry_trace(header) do + {:ok, {trace_hex, span_hex, sampled}} -> + ctx = + ctx + |> :otel_ctx.set_value(@sentry_trace_ctx_key, {trace_hex, span_hex, sampled}) + |> maybe_set_baggage(getter.(@sentry_baggage_key, carrier)) + + trace_id = hex_to_int(trace_hex) + span_id = hex_to_int(span_hex) + + # Always use sampled (1) to simulate a sampled trace on the OTel side + trace_flags = 1 + + remote_span_ctx = + :otel_tracer.from_remote_span(trace_id, span_id, trace_flags) + + Tracer.set_current_span(ctx, remote_span_ctx) + + {:error, _reason} -> + # Invalid header format, skip propagation + ctx + end + + _ -> + ctx + end + end + + # Encode trace ID, span ID, and sampled flag to sentry-trace header format + # Format: {trace_id}-{span_id}-{sampled} + defp encode_sentry_trace({trace_id_int, span_id_int, trace_flags}) do + sampled = if (trace_flags &&& 1) == 1, do: "1", else: "0" + int_to_hex(trace_id_int, 16) <> "-" <> int_to_hex(span_id_int, 8) <> "-" <> sampled + end + + # Decode sentry-trace header + # Format: {trace_id}-{span_id}-{sampled} or {trace_id}-{span_id} + defp decode_sentry_trace( + <> + ) do + {:ok, {trace_hex, span_hex, sampled == "1"}} + end + + defp decode_sentry_trace(<>) do + {:ok, {trace_hex, span_hex, false}} + end + + defp decode_sentry_trace(_invalid) do + {:error, :invalid_format} + end + + defp maybe_set_baggage(ctx, :undefined), do: ctx + defp maybe_set_baggage(ctx, ""), do: ctx + defp maybe_set_baggage(ctx, nil), do: ctx + + defp maybe_set_baggage(ctx, baggage) when is_binary(baggage) do + :otel_ctx.set_value(ctx, @sentry_baggage_ctx_key, baggage) + end + + # Convert hex string to integer + defp hex_to_int(hex) do + hex + |> Base.decode16!(case: :mixed) + |> :binary.decode_unsigned() + end + + # Convert integer to hex string with padding + defp int_to_hex(value, num_bytes) do + value + |> :binary.encode_unsigned() + |> bin_pad_left(num_bytes) + |> Base.encode16(case: :lower) + end + + # Pad binary to specified number of bytes + defp bin_pad_left(bin, total_bytes) do + missing = total_bytes - byte_size(bin) + if missing > 0, do: :binary.copy(<<0>>, missing) <> bin, else: bin + end + end +end diff --git a/test/sentry/opentelemetry/propagator_test.exs b/test/sentry/opentelemetry/propagator_test.exs new file mode 100644 index 00000000..a1b38114 --- /dev/null +++ b/test/sentry/opentelemetry/propagator_test.exs @@ -0,0 +1,284 @@ +defmodule Sentry.OpenTelemetry.PropagatorTest do + use ExUnit.Case, async: true + + alias Sentry.OpenTelemetry.Propagator + + @moduletag skip: not Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() + + if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do + require OpenTelemetry.Tracer, as: Tracer + require Record + + @fields Record.extract(:span_ctx, from_lib: "opentelemetry_api/include/opentelemetry.hrl") + Record.defrecordp(:span_ctx, @fields) + + describe "fields/1" do + test "returns the header fields used by the propagator" do + assert Propagator.fields([]) == ["sentry-trace", "baggage"] + end + end + + describe "inject/4" do + test "injects sentry-trace header from current span context" do + trace_id = 0x1234567890ABCDEF1234567890ABCDEF + span_id = 0x1234567890ABCDEF + trace_flags = 1 + + span_context = + span_ctx( + trace_id: trace_id, + span_id: span_id, + trace_flags: trace_flags, + tracestate: [], + is_valid: true, + is_remote: false + ) + + ctx = Tracer.set_current_span(:otel_ctx.new(), span_context) + + setter = fn key, value, carrier -> + Map.put(carrier, key, value) + end + + carrier = Propagator.inject(ctx, %{}, setter, []) + + assert Map.has_key?(carrier, "sentry-trace") + sentry_trace = Map.get(carrier, "sentry-trace") + + # Verify format: {trace_id}-{span_id}-{sampled} + assert sentry_trace =~ ~r/^[0-9a-f]{32}-[0-9a-f]{16}-[01]$/ + assert String.ends_with?(sentry_trace, "-1") + end + + test "does not inject when no span context is present" do + ctx = :otel_ctx.new() + + setter = fn key, value, carrier -> + Map.put(carrier, key, value) + end + + carrier = Propagator.inject(ctx, %{}, setter, []) + + assert carrier == %{} + end + end + + describe "extract/5" do + test "extracts sentry-trace header and sets remote span context" do + sentry_trace_header = "1234567890abcdef1234567890abcdef-1234567890abcdef-1" + + getter = fn key, _carrier -> + case key do + "sentry-trace" -> sentry_trace_header + "baggage" -> :undefined + _ -> :undefined + end + end + + ctx = Propagator.extract(:otel_ctx.new(), %{}, nil, getter, []) + + # Verify that a remote span context was set + span_ctx = Tracer.current_span_ctx(ctx) + assert span_ctx != :undefined + + # Verify trace and span IDs were converted correctly + expected_trace_id = 0x1234567890ABCDEF1234567890ABCDEF + expected_span_id = 0x1234567890ABCDEF + + assert span_ctx(span_ctx, :trace_id) == expected_trace_id + assert span_ctx(span_ctx, :span_id) == expected_span_id + assert span_ctx(span_ctx, :trace_flags) == 1 + assert span_ctx(span_ctx, :is_remote) == true + end + + test "extracts sentry-trace without sampled flag" do + sentry_trace_header = "1234567890abcdef1234567890abcdef-1234567890abcdef" + + getter = fn key, _carrier -> + case key do + "sentry-trace" -> sentry_trace_header + _ -> :undefined + end + end + + ctx = Propagator.extract(:otel_ctx.new(), %{}, nil, getter, []) + + # Verify remote span context was set + span_ctx = Tracer.current_span_ctx(ctx) + assert span_ctx != :undefined + + # Even when not sampled in the header, trace_flags should be 1 + # because we always simulate sampled traces on the OTel side + assert span_ctx(span_ctx, :trace_flags) == 1 + assert span_ctx(span_ctx, :is_remote) == true + end + + test "handles missing sentry-trace header" do + getter = fn _key, _carrier -> :undefined end + + ctx = Propagator.extract(:otel_ctx.new(), %{}, nil, getter, []) + + # Context should be unchanged + assert Tracer.current_span_ctx(ctx) == :undefined + end + + test "handles invalid sentry-trace header format" do + invalid_headers = [ + "invalid", + "1234-5678", + "toolong1234567890abcdef1234567890abcdef-1234567890abcdef-1" + ] + + for invalid_header <- invalid_headers do + getter = fn key, _carrier -> + case key do + "sentry-trace" -> invalid_header + _ -> :undefined + end + end + + ctx = Propagator.extract(:otel_ctx.new(), %{}, nil, getter, []) + + # Invalid headers should be ignored - no remote span should be set + assert Tracer.current_span_ctx(ctx) == :undefined + end + end + + test "extracts and stores baggage header" do + sentry_trace_header = "1234567890abcdef1234567890abcdef-1234567890abcdef-1" + + baggage_header = + "sentry-trace_id=771a43a4192642f0b136d5159a501700," <> + "sentry-public_key=49d0f7386ad645858ae85020e393bef3," <> + "sentry-sample_rate=0.01337,sentry-user_id=Am%C3%A9lie" + + getter = fn key, _carrier -> + case key do + "sentry-trace" -> sentry_trace_header + "baggage" -> baggage_header + _ -> :undefined + end + end + + ctx = Propagator.extract(:otel_ctx.new(), %{}, nil, getter, []) + + # Verify baggage was stored in context + stored_baggage = :otel_ctx.get_value(ctx, :"sentry-baggage", :not_found) + assert stored_baggage == baggage_header + end + + test "handles missing baggage header" do + sentry_trace_header = "1234567890abcdef1234567890abcdef-1234567890abcdef-1" + + getter = fn key, _carrier -> + case key do + "sentry-trace" -> sentry_trace_header + _ -> :undefined + end + end + + ctx = Propagator.extract(:otel_ctx.new(), %{}, nil, getter, []) + + # Verify baggage is not set when not provided + stored_baggage = :otel_ctx.get_value(ctx, :"sentry-baggage", :not_found) + assert stored_baggage == :not_found + end + end + + describe "baggage propagation" do + test "injects baggage from context" do + trace_id = 0x1234567890ABCDEF1234567890ABCDEF + span_id = 0x1234567890ABCDEF + trace_flags = 1 + baggage_value = "sentry-trace_id=771a43a4192642f0b136d5159a501700,sentry-release=1.0.0" + + span_context = + span_ctx( + trace_id: trace_id, + span_id: span_id, + trace_flags: trace_flags, + tracestate: [], + is_valid: true, + is_remote: false + ) + + ctx = + :otel_ctx.new() + |> Tracer.set_current_span(span_context) + |> :otel_ctx.set_value(:"sentry-baggage", baggage_value) + + setter = fn key, value, carrier -> + Map.put(carrier, key, value) + end + + carrier = Propagator.inject(ctx, %{}, setter, []) + + # Verify both sentry-trace and baggage are injected + assert Map.has_key?(carrier, "sentry-trace") + assert Map.get(carrier, "baggage") == baggage_value + end + + test "does not inject baggage when not in context" do + trace_id = 0x1234567890ABCDEF1234567890ABCDEF + span_id = 0x1234567890ABCDEF + trace_flags = 1 + + span_context = + span_ctx( + trace_id: trace_id, + span_id: span_id, + trace_flags: trace_flags, + tracestate: [], + is_valid: true, + is_remote: false + ) + + ctx = Tracer.set_current_span(:otel_ctx.new(), span_context) + + setter = fn key, value, carrier -> + Map.put(carrier, key, value) + end + + carrier = Propagator.inject(ctx, %{}, setter, []) + + # Verify only sentry-trace is injected + assert Map.has_key?(carrier, "sentry-trace") + assert not Map.has_key?(carrier, "baggage") + end + end + + describe "integration with OpenTelemetry" do + test "round-trip inject and extract preserves trace context" do + # Start a span to create a trace context + Tracer.with_span "test_span" do + ctx = :otel_ctx.get_current() + span_ctx = Tracer.current_span_ctx(ctx) + + original_trace_id = span_ctx(span_ctx, :trace_id) + original_span_id = span_ctx(span_ctx, :span_id) + + # Inject into carrier + setter = fn key, value, carrier -> + Map.put(carrier, key, value) + end + + carrier = Propagator.inject(ctx, %{}, setter, []) + + # Extract from carrier + getter = fn key, carrier -> + Map.get(carrier, key, :undefined) + end + + new_ctx = Propagator.extract(:otel_ctx.new(), carrier, nil, getter, []) + new_span_ctx = Tracer.current_span_ctx(new_ctx) + + # Trace ID should be preserved + assert span_ctx(new_span_ctx, :trace_id) == original_trace_id + + # The span ID in the new context becomes the parent span ID + assert span_ctx(new_span_ctx, :span_id) == original_span_id + end + end + end + end +end From 4d84afc4c7c5ede06dcddb073120af3cbba1ddde Mon Sep 17 00:00:00 2001 From: Peter Solnica Date: Fri, 28 Nov 2025 15:03:30 +0000 Subject: [PATCH 2/4] wip - update SpanProcessor for DT --- lib/sentry/opentelemetry/span_processor.ex | 191 ++++++- .../opentelemetry/span_processor_test.exs | 524 +++++++++++++++++- 2 files changed, 686 insertions(+), 29 deletions(-) diff --git a/lib/sentry/opentelemetry/span_processor.ex b/lib/sentry/opentelemetry/span_processor.ex index c0f81203..17227258 100644 --- a/lib/sentry/opentelemetry/span_processor.ex +++ b/lib/sentry/opentelemetry/span_processor.ex @@ -11,48 +11,149 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do require OpenTelemetry.SemConv.Incubating.MessagingAttributes, as: MessagingAttributes require Logger + require Record alias Sentry.{Transaction, OpenTelemetry.SpanStorage, OpenTelemetry.SpanRecord} alias Sentry.Interfaces.Span - # This can be a no-op since we can postpone inserting the span into storage until on_end + # Extract span record fields to access parent_span_id in on_start + @span_fields Record.extract(:span, from_lib: "opentelemetry/include/otel_span.hrl") + Record.defrecordp(:span, @span_fields) + @impl :otel_span_processor def on_start(_ctx, otel_span, _config) do + # Track pending children: when a span starts with a parent, register it + # as a pending child. This allows us to wait for all children when + # the parent ends, solving the race condition where parent.on_end + # is called before child.on_end. + parent_span_id = span(otel_span, :parent_span_id) + span_id = span(otel_span, :span_id) + + if parent_span_id != nil and parent_span_id != :undefined do + parent_span_id_str = cast_span_id(parent_span_id) + span_id_str = cast_span_id(span_id) + + if parent_span_id_str != nil and span_id_str != nil do + SpanStorage.store_pending_child(parent_span_id_str, span_id_str) + end + end + otel_span end @impl :otel_span_processor def on_end(otel_span, _config) do span_record = SpanRecord.new(otel_span) + process_span(span_record) + end + defp process_span(span_record) do SpanStorage.store_span(span_record) - if span_record.parent_span_id == nil do - child_span_records = SpanStorage.get_child_spans(span_record.span_id) - transaction = build_transaction(span_record, child_span_records) + # Remove this span from pending children of its parent (if any) + # This must happen after store_span so the span is available when + # the parent builds its transaction + _ = + if span_record.parent_span_id != nil do + SpanStorage.remove_pending_child(span_record.parent_span_id, span_record.span_id) - result = - case Sentry.send_transaction(transaction) do - {:ok, _id} -> - true + # Check if our parent was waiting for us (and possibly other siblings) + # If parent has ended and we were the last pending child, build the transaction now + maybe_complete_waiting_parent(span_record.parent_span_id) + end - :ignored -> - true + # Check if this is a root span (no parent) or a transaction root + # + # A span should be a transaction root if: + # 1. It has no parent (true root span) + # 2. OR it's a server span with only a REMOTE parent (distributed tracing) + # + # A span should NOT be a transaction root if: + # - It has a LOCAL parent (parent span exists in our SpanStorage) + is_transaction_root = + cond do + # No parent = definitely a root + span_record.parent_span_id == nil -> + true + + # Has a parent - check if it's local or remote + true -> + has_local_parent = has_local_parent_span?(span_record.parent_span_id) + + if has_local_parent do + # Parent exists locally - this is a child span, not a transaction root + false + else + # Parent is remote (distributed tracing) - treat server spans as transaction roots + is_server_span?(span_record) + end + end - :excluded -> - true + if is_transaction_root do + # Check if we have pending children that haven't ended yet + if SpanStorage.has_pending_children?(span_record.span_id) do + # Store as waiting parent - transaction will be built when last child ends + SpanStorage.store_waiting_parent(span_record) + true + else + # No pending children, build and send transaction immediately + build_and_send_transaction(span_record) + end + else + true + end + end - {:error, error} -> - Logger.warning("Failed to send transaction to Sentry: #{inspect(error)}") - {:error, :invalid_span} + # Called when a child span ends to check if its parent was waiting for children + defp maybe_complete_waiting_parent(parent_span_id) do + case SpanStorage.get_waiting_parent(parent_span_id) do + nil -> + # Parent hasn't ended yet or wasn't a transaction root - nothing to do + :ok + + parent_span_record -> + # Parent was waiting. Check if all children are done now. + unless SpanStorage.has_pending_children?(parent_span_id) do + # All children done! Build and send the transaction. + SpanStorage.remove_waiting_parent(parent_span_id) + build_and_send_transaction(parent_span_record) end + end + end - :ok = SpanStorage.remove_root_span(span_record.span_id) + defp build_and_send_transaction(span_record) do + child_span_records = SpanStorage.get_child_spans(span_record.span_id) + transaction = build_transaction(span_record, child_span_records) - result - else - true + result = + case Sentry.send_transaction(transaction) do + {:ok, _id} -> + true + + :ignored -> + true + + :excluded -> + true + + {:error, error} -> + Logger.warning("Failed to send transaction to Sentry: #{inspect(error)}") + {:error, :invalid_span} + end + + # Clean up: remove the transaction root span and all its children + :ok = SpanStorage.remove_root_span(span_record.span_id) + + # Also clean up any remaining pending children records (shouldn't be any, but be safe) + :ok = SpanStorage.remove_pending_children(span_record.span_id) + + if span_record.parent_span_id != nil do + # This span was stored as a child because it has a remote parent (distributed tracing). + # We need to explicitly remove it from the child spans storage. + :ok = SpanStorage.remove_child_span(span_record.parent_span_id, span_record.span_id) end + + result end @impl :otel_span_processor @@ -60,6 +161,24 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do :ok end + # Checks if a parent span exists in our local SpanStorage + # This helps distinguish between: + # - Local parents: span exists in storage (same service) + # - Remote parents: span doesn't exist in storage (distributed tracing from another service) + defp has_local_parent_span?(parent_span_id) do + SpanStorage.span_exists?(parent_span_id) + end + + # Helper function to detect if a span is a server span that should be + # treated as a transaction root for distributed tracing. + # This includes HTTP server request spans (have http.request.method attribute) + defp is_server_span?(%{kind: :server, attributes: attributes}) do + # Check if it's an HTTP server request span (has http.request.method) + Map.has_key?(attributes, to_string(HTTPAttributes.http_request_method())) + end + + defp is_server_span?(_), do: false + defp build_transaction(root_span_record, child_span_records) do root_span = build_span(root_span_record) child_spans = Enum.map(child_span_records, &build_span(&1)) @@ -112,12 +231,27 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do client_address = Map.get(span_record.attributes, to_string(ClientAttributes.client_address())) - url_path = Map.get(span_record.attributes, to_string(URLAttributes.url_path())) + # Try multiple attributes for the URL path + url_path = + Map.get(span_record.attributes, to_string(URLAttributes.url_path())) || + Map.get(span_record.attributes, "url.full") || + Map.get(span_record.attributes, "http.target") || + Map.get(span_record.attributes, "http.route") || + span_record.name + # Build description with method and path description = - to_string(http_request_method) <> - ((client_address && " from #{client_address}") || "") <> - ((url_path && " #{url_path}") || "") + case url_path do + nil -> to_string(http_request_method) + path -> "#{http_request_method} #{path}" + end + + description = + if client_address do + "#{description} from #{client_address}" + else + description + end {op, description} end @@ -207,5 +341,16 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do end) |> Map.new() end + + # Helper to convert span ID from integer to hex string (used in on_start) + defp cast_span_id(nil), do: nil + defp cast_span_id(:undefined), do: nil + + defp cast_span_id(span_id) do + case :otel_utils.format_binary_string("~16.16.0b", [span_id]) do + {:ok, result} -> result + {:error, _} -> nil + end + end end end diff --git a/test/sentry/opentelemetry/span_processor_test.exs b/test/sentry/opentelemetry/span_processor_test.exs index eaa76f39..5b45bc17 100644 --- a/test/sentry/opentelemetry/span_processor_test.exs +++ b/test/sentry/opentelemetry/span_processor_test.exs @@ -1,6 +1,10 @@ defmodule Sentry.Opentelemetry.SpanProcessorTest do use Sentry.Case, async: false + require OpenTelemetry.Tracer, as: Tracer + require OpenTelemetry.SemConv.Incubating.HTTPAttributes, as: HTTPAttributes + require OpenTelemetry.SemConv.Incubating.URLAttributes, as: URLAttributes + import Sentry.TestHelpers alias Sentry.OpenTelemetry.SpanStorage @@ -188,8 +192,6 @@ defmodule Sentry.Opentelemetry.SpanProcessorTest do Sentry.Test.start_collecting_sentry_reports() - require OpenTelemetry.Tracer, as: Tracer - Tracer.with_span "root_span" do Tracer.with_span "level_1_child" do Tracer.with_span "level_2_child" do @@ -247,8 +249,6 @@ defmodule Sentry.Opentelemetry.SpanProcessorTest do tasks = Enum.map(1..20, fn i -> Task.async(fn -> - require OpenTelemetry.Tracer, as: Tracer - Tracer.with_span "concurrent_root_#{i}" do Tracer.with_span "concurrent_child_#{i}" do Process.sleep(10) @@ -286,8 +286,6 @@ defmodule Sentry.Opentelemetry.SpanProcessorTest do Sentry.Test.start_collecting_sentry_reports() - require OpenTelemetry.Tracer, as: Tracer - Tracer.with_span "root_span" do Tracer.with_span "child_instrumented_function_one" do Process.sleep(10) @@ -310,5 +308,519 @@ defmodule Sentry.Opentelemetry.SpanProcessorTest do Application.put_env(:opentelemetry, :sampler, original_sampler) end + + @tag span_storage: true + test "treats HTTP server request spans as transaction roots for distributed tracing" do + put_test_config(environment_name: "test", traces_sample_rate: 1.0) + + Sentry.Test.start_collecting_sentry_reports() + + # Simulate an incoming HTTP request with an external parent span ID (from browser/client) + # This represents a distributed trace where the client started the trace + external_trace_id = 0x1234567890ABCDEF1234567890ABCDEF + external_parent_span_id = 0xABCDEF1234567890 + + # Create a remote parent span context using :otel_tracer.from_remote_span + remote_parent = :otel_tracer.from_remote_span(external_trace_id, external_parent_span_id, 1) + + ctx = Tracer.set_current_span(:otel_ctx.new(), remote_parent) + + # Start an HTTP server span with the remote parent context + Tracer.with_span ctx, "POST /api/users", %{ + kind: :server, + attributes: %{ + HTTPAttributes.http_request_method() => :POST, + URLAttributes.url_path() => "/api/users", + "http.route" => "/api/users", + "server.address" => "localhost", + "server.port" => 4000 + } + } do + # Simulate child spans (database queries, etc.) + Tracer.with_span "db.query:users", %{ + kind: :client, + attributes: %{ + "db.system" => :postgresql, + "db.statement" => "INSERT INTO users (name) VALUES ($1)" + } + } do + Process.sleep(10) + end + + Tracer.with_span "db.query:notifications", %{ + kind: :client, + attributes: %{ + "db.system" => :postgresql, + "db.statement" => "INSERT INTO notifications (user_id) VALUES ($1)" + } + } do + Process.sleep(10) + end + end + + # Should capture the HTTP request span as a transaction root despite having an external parent + assert [%Sentry.Transaction{} = transaction] = Sentry.Test.pop_sentry_transactions() + + # Verify transaction properties + assert transaction.transaction == "POST /api/users" + assert transaction.transaction_info == %{source: :custom} + assert length(transaction.spans) == 2 + + # Verify child spans are properly included + span_ops = Enum.map(transaction.spans, & &1.op) |> Enum.sort() + assert span_ops == ["db", "db"] + + # Verify child spans have detailed data (like SQL queries) + [span1, span2] = transaction.spans + assert span1.description =~ "INSERT INTO" + assert span2.description =~ "INSERT INTO" + assert span1.data["db.system"] == :postgresql + assert span2.data["db.system"] == :postgresql + assert span1.data["db.statement"] =~ "INSERT INTO users" + assert span2.data["db.statement"] =~ "INSERT INTO notifications" + + # Verify all spans share the same trace ID (from the external parent) + trace_id = transaction.contexts.trace.trace_id + + Enum.each(transaction.spans, fn span -> + assert span.trace_id == trace_id + end) + + # The transaction should have the external parent's trace ID + assert transaction.contexts.trace.trace_id == + "1234567890abcdef1234567890abcdef" + end + + @tag span_storage: true + test "cleans up HTTP server span and children after sending distributed trace transaction", %{ + table_name: table_name + } do + put_test_config(environment_name: "test", traces_sample_rate: 1.0) + + Sentry.Test.start_collecting_sentry_reports() + + # Simulate an incoming HTTP request with an external parent span ID (from browser/client) + external_trace_id = 0x1234567890ABCDEF1234567890ABCDEF + external_parent_span_id = 0xABCDEF1234567890 + + remote_parent = :otel_tracer.from_remote_span(external_trace_id, external_parent_span_id, 1) + ctx = Tracer.set_current_span(:otel_ctx.new(), remote_parent) + + # Start an HTTP server span with the remote parent context + Tracer.with_span ctx, "POST /api/users", %{ + kind: :server, + attributes: %{ + HTTPAttributes.http_request_method() => :POST, + URLAttributes.url_path() => "/api/users" + } + } do + # Simulate child spans (database queries, etc.) + Tracer.with_span "db.query:users", %{ + kind: :client, + attributes: %{ + "db.system" => :postgresql, + "db.statement" => "INSERT INTO users (name) VALUES ($1)" + } + } do + Process.sleep(10) + end + end + + # Should capture the HTTP request span as a transaction + assert [%Sentry.Transaction{} = transaction] = Sentry.Test.pop_sentry_transactions() + + # Verify the HTTP server span was removed from storage + # (even though it was stored as a child span due to having a remote parent) + http_server_span_id = transaction.contexts.trace.span_id + remote_parent_span_id_str = "abcdef1234567890" + + # The HTTP server span should not exist in storage anymore + assert SpanStorage.get_root_span(http_server_span_id, table_name: table_name) == nil + + # Check that it was also removed from child spans storage + # We can't directly check if a specific child was removed, but we can verify + # that get_child_spans for the remote parent returns empty (or doesn't include our span) + remaining_children = + SpanStorage.get_child_spans(remote_parent_span_id_str, table_name: table_name) + + refute Enum.any?(remaining_children, fn span -> span.span_id == http_server_span_id end) + + # Verify child spans of the HTTP server span were also removed + assert [] == SpanStorage.get_child_spans(http_server_span_id, table_name: table_name) + end + end + + describe "parent-child span ordering (race condition)" do + # These tests verify that child spans are included in transactions even when + # the parent span's on_end is called before the child span's on_end. + # This simulates what happens with telemetry-based instrumentation like + # opentelemetry_phoenix + opentelemetry_ecto where the order of on_end calls + # is not guaranteed. + + @tag span_storage: true + test "includes child spans when parent ends before child (race condition)" do + put_test_config(environment_name: "test", traces_sample_rate: 1.0) + + Sentry.Test.start_collecting_sentry_reports() + + # We need to simulate the race condition where: + # 1. Parent span starts + # 2. Child span starts + # 3. Parent span ends (on_end called) - but child is still running + # 4. Child span ends (on_end called) - after parent already ended + # + # This happens with telemetry-based instrumentation because: + # - Phoenix LiveView mount span ends when the mount callback returns + # - Ecto query span ends when the query completes + # - But with async telemetry events, the order of on_end calls isn't guaranteed + + test_pid = self() + + # Start parent span and get its context + parent_task = + Task.async(fn -> + Tracer.with_span "parent_span", %{kind: :internal} do + # Get parent span ID to pass to child + parent_span_ctx = Tracer.current_span_ctx() + parent_ctx = :otel_ctx.get_current() + + # Send context to main process + send(test_pid, {:parent_ctx, parent_ctx, parent_span_ctx}) + + # Wait for child to start before parent ends + receive do + :child_started -> :ok + after + 5000 -> raise "Timeout waiting for child_started" + end + + # Parent span ends here (when with_span block exits) + # but child is still running + end + + # Signal that parent has ended + send(test_pid, :parent_ended) + end) + + # Receive parent context + parent_ctx = + receive do + {:parent_ctx, ctx, _span_ctx} -> ctx + after + 5000 -> raise "Timeout waiting for parent context" + end + + # Start child span in a separate process with parent context + child_task = + Task.async(fn -> + # Attach parent context to this process + :otel_ctx.attach(parent_ctx) + + Tracer.with_span "child_span_delayed", %{kind: :internal} do + # Signal that child has started + send(test_pid, :child_started) + + # Wait for parent to end first + receive do + :parent_ended -> :ok + after + 5000 -> raise "Timeout waiting for parent_ended" + end + + # Add a small delay to ensure parent's on_end has been processed + Process.sleep(50) + + # Child span ends here (after parent has already ended) + end + end) + + # Wait for child to start + receive do + :child_started -> :ok + after + 5000 -> raise "Timeout waiting for child_started signal" + end + + # Signal parent to end + send(parent_task.pid, :child_started) + + # Wait for parent to end + receive do + :parent_ended -> :ok + after + 5000 -> raise "Timeout waiting for parent_ended signal" + end + + # Signal child to end (after parent has ended) + send(child_task.pid, :parent_ended) + + Task.await(parent_task) + Task.await(child_task) + + # Give some time for async processing + Process.sleep(100) + + # The transaction should include the child span + transactions = Sentry.Test.pop_sentry_transactions() + + assert length(transactions) == 1, "Expected 1 transaction, got #{length(transactions)}" + [transaction] = transactions + + assert transaction.transaction == "parent_span" + + # THIS IS THE KEY ASSERTION: The child span should be included + # even though the parent's on_end was called before the child's on_end + assert length(transaction.spans) == 1, + "Expected 1 child span, got #{length(transaction.spans)}. " <> + "This fails due to the race condition where parent ends before child." + + [child_span] = transaction.spans + assert child_span.op == "child_span_delayed" + assert child_span.parent_span_id == transaction.span_id + end + + @tag span_storage: true + test "includes multiple child spans when parent ends before children (race condition)" do + put_test_config(environment_name: "test", traces_sample_rate: 1.0) + + Sentry.Test.start_collecting_sentry_reports() + + # Similar test but with multiple children ending at different times after the parent + test_pid = self() + + parent_task = + Task.async(fn -> + Tracer.with_span "parent_with_multiple_children", %{kind: :internal} do + parent_ctx = :otel_ctx.get_current() + send(test_pid, {:parent_ctx, parent_ctx}) + + # Wait for both children to start + receive do + :children_started -> :ok + after + 5000 -> raise "Timeout" + end + + # Parent ends here + end + + send(test_pid, :parent_ended) + end) + + parent_ctx = + receive do + {:parent_ctx, ctx} -> ctx + after + 5000 -> raise "Timeout" + end + + # Start two child spans in separate processes + child1_task = + Task.async(fn -> + :otel_ctx.attach(parent_ctx) + + Tracer.with_span "child_1", %{kind: :internal} do + send(test_pid, :child1_started) + + receive do + :end_child1 -> :ok + after + 5000 -> raise "Timeout" + end + end + end) + + child2_task = + Task.async(fn -> + :otel_ctx.attach(parent_ctx) + + Tracer.with_span "child_2", %{kind: :internal} do + send(test_pid, :child2_started) + + receive do + :end_child2 -> :ok + after + 5000 -> raise "Timeout" + end + end + end) + + # Wait for both children to start + receive do + :child1_started -> :ok + after + 5000 -> raise "Timeout" + end + + receive do + :child2_started -> :ok + after + 5000 -> raise "Timeout" + end + + # Signal parent that children have started + send(parent_task.pid, :children_started) + + # Wait for parent to end + receive do + :parent_ended -> :ok + after + 5000 -> raise "Timeout" + end + + # End children in sequence (both after parent) + Process.sleep(50) + send(child1_task.pid, :end_child1) + Process.sleep(20) + send(child2_task.pid, :end_child2) + + Task.await(parent_task) + Task.await(child1_task) + Task.await(child2_task) + + Process.sleep(100) + + transactions = Sentry.Test.pop_sentry_transactions() + + assert length(transactions) == 1, "Expected 1 transaction, got #{length(transactions)}" + [transaction] = transactions + + assert transaction.transaction == "parent_with_multiple_children" + + # Both child spans should be included + assert length(transaction.spans) == 2, + "Expected 2 child spans, got #{length(transaction.spans)}. " <> + "This fails due to the race condition." + + span_ops = Enum.map(transaction.spans, & &1.op) |> Enum.sort() + assert span_ops == ["child_1", "child_2"] + end + + @tag span_storage: true + test "includes child spans created with start_span/end_span (ecto pattern)" do + # This test mimics the exact pattern used by opentelemetry_ecto: + # - Parent span uses with_span (like LiveView mount) + # - Child span uses start_span/end_span directly within the same process + # (like Ecto telemetry handler) + # + # The key difference is that start_span/end_span doesn't nest like with_span, + # so we need to manually attach the parent context. + + put_test_config(environment_name: "test", traces_sample_rate: 1.0) + + Sentry.Test.start_collecting_sentry_reports() + + test_pid = self() + + # This simulates what happens in LiveView + Ecto: + # 1. LiveView mount span starts (via opentelemetry_phoenix telemetry) + # 2. Ecto query runs, which creates a child span via telemetry + # 3. LiveView mount span ends + parent_task = + Task.async(fn -> + Tracer.with_span "LiveView.mount", %{kind: :server} do + # Get the current context to pass to child + parent_ctx = :otel_ctx.get_current() + send(test_pid, {:parent_ctx, parent_ctx}) + + # Wait for child to start + receive do + :child_started -> :ok + after + 5000 -> raise "Timeout" + end + + # Parent ends here, but child may still be running + end + + send(test_pid, :parent_ended) + end) + + # Get parent context + parent_ctx = + receive do + {:parent_ctx, ctx} -> ctx + after + 5000 -> raise "Timeout" + end + + # Simulate Ecto span creation (using start_span/end_span like opentelemetry_ecto does) + child_task = + Task.async(fn -> + # Attach parent context (this is what opentelemetry_process_propagator does) + # opentelemetry_ecto uses OpenTelemetry.Ctx.attach which returns a token + parent_token = OpenTelemetry.Ctx.attach(parent_ctx) + + # Create child span using start_span (not with_span) + # This is exactly what opentelemetry_ecto does + child_span = + OpenTelemetry.Tracer.start_span("Ecto.query", %{ + kind: :client, + attributes: %{ + "db.system" => :postgresql, + "db.statement" => "SELECT * FROM users" + } + }) + + send(test_pid, :child_started) + + # Wait for parent to end first (simulating the race condition) + receive do + :parent_ended -> :ok + after + 5000 -> raise "Timeout" + end + + Process.sleep(50) + + # End the child span (after parent has ended) + # opentelemetry_ecto uses OpenTelemetry.Span.end_span/1, not Tracer.end_span/1 + OpenTelemetry.Span.end_span(child_span) + + # Detach the context token (cleanup) + OpenTelemetry.Ctx.detach(parent_token) + end) + + # Wait for child to start + receive do + :child_started -> :ok + after + 5000 -> raise "Timeout" + end + + # Signal parent to end + send(parent_task.pid, :child_started) + + # Wait for parent to end + receive do + :parent_ended -> :ok + after + 5000 -> raise "Timeout" + end + + # Signal child that parent has ended + send(child_task.pid, :parent_ended) + + Task.await(parent_task) + Task.await(child_task) + + Process.sleep(100) + + transactions = Sentry.Test.pop_sentry_transactions() + + assert length(transactions) == 1, "Expected 1 transaction, got #{length(transactions)}" + [transaction] = transactions + + assert transaction.transaction == "LiveView.mount" + + # The child span should be included even with start_span/end_span pattern + assert length(transaction.spans) == 1, + "Expected 1 child span (Ecto query), got #{length(transaction.spans)}. " <> + "This tests the opentelemetry_ecto pattern." + + [child_span] = transaction.spans + assert child_span.op == "db" + assert child_span.description == "SELECT * FROM users" + end end end From 46a4972d5e4672f141d3aa51a6cda89474c3e0b8 Mon Sep 17 00:00:00 2001 From: Peter Solnica Date: Mon, 1 Dec 2025 14:22:18 +0000 Subject: [PATCH 3/4] wip - clean up after narrowing down the scope --- lib/sentry/opentelemetry/span_processor.ex | 75 +--- lib/sentry/opentelemetry/span_storage.ex | 14 + .../opentelemetry/span_processor_test.exs | 374 ------------------ 3 files changed, 15 insertions(+), 448 deletions(-) diff --git a/lib/sentry/opentelemetry/span_processor.ex b/lib/sentry/opentelemetry/span_processor.ex index 17227258..3ab79718 100644 --- a/lib/sentry/opentelemetry/span_processor.ex +++ b/lib/sentry/opentelemetry/span_processor.ex @@ -22,22 +22,6 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do @impl :otel_span_processor def on_start(_ctx, otel_span, _config) do - # Track pending children: when a span starts with a parent, register it - # as a pending child. This allows us to wait for all children when - # the parent ends, solving the race condition where parent.on_end - # is called before child.on_end. - parent_span_id = span(otel_span, :parent_span_id) - span_id = span(otel_span, :span_id) - - if parent_span_id != nil and parent_span_id != :undefined do - parent_span_id_str = cast_span_id(parent_span_id) - span_id_str = cast_span_id(span_id) - - if parent_span_id_str != nil and span_id_str != nil do - SpanStorage.store_pending_child(parent_span_id_str, span_id_str) - end - end - otel_span end @@ -50,18 +34,6 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do defp process_span(span_record) do SpanStorage.store_span(span_record) - # Remove this span from pending children of its parent (if any) - # This must happen after store_span so the span is available when - # the parent builds its transaction - _ = - if span_record.parent_span_id != nil do - SpanStorage.remove_pending_child(span_record.parent_span_id, span_record.span_id) - - # Check if our parent was waiting for us (and possibly other siblings) - # If parent has ended and we were the last pending child, build the transaction now - maybe_complete_waiting_parent(span_record.parent_span_id) - end - # Check if this is a root span (no parent) or a transaction root # # A span should be a transaction root if: @@ -90,37 +62,12 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do end if is_transaction_root do - # Check if we have pending children that haven't ended yet - if SpanStorage.has_pending_children?(span_record.span_id) do - # Store as waiting parent - transaction will be built when last child ends - SpanStorage.store_waiting_parent(span_record) - true - else - # No pending children, build and send transaction immediately - build_and_send_transaction(span_record) - end + build_and_send_transaction(span_record) else true end end - # Called when a child span ends to check if its parent was waiting for children - defp maybe_complete_waiting_parent(parent_span_id) do - case SpanStorage.get_waiting_parent(parent_span_id) do - nil -> - # Parent hasn't ended yet or wasn't a transaction root - nothing to do - :ok - - parent_span_record -> - # Parent was waiting. Check if all children are done now. - unless SpanStorage.has_pending_children?(parent_span_id) do - # All children done! Build and send the transaction. - SpanStorage.remove_waiting_parent(parent_span_id) - build_and_send_transaction(parent_span_record) - end - end - end - defp build_and_send_transaction(span_record) do child_span_records = SpanStorage.get_child_spans(span_record.span_id) transaction = build_transaction(span_record, child_span_records) @@ -144,15 +91,6 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do # Clean up: remove the transaction root span and all its children :ok = SpanStorage.remove_root_span(span_record.span_id) - # Also clean up any remaining pending children records (shouldn't be any, but be safe) - :ok = SpanStorage.remove_pending_children(span_record.span_id) - - if span_record.parent_span_id != nil do - # This span was stored as a child because it has a remote parent (distributed tracing). - # We need to explicitly remove it from the child spans storage. - :ok = SpanStorage.remove_child_span(span_record.parent_span_id, span_record.span_id) - end - result end @@ -341,16 +279,5 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do end) |> Map.new() end - - # Helper to convert span ID from integer to hex string (used in on_start) - defp cast_span_id(nil), do: nil - defp cast_span_id(:undefined), do: nil - - defp cast_span_id(span_id) do - case :otel_utils.format_binary_string("~16.16.0b", [span_id]) do - {:ok, result} -> result - {:error, _} -> nil - end - end end end diff --git a/lib/sentry/opentelemetry/span_storage.ex b/lib/sentry/opentelemetry/span_storage.ex index 4b69b936..c8e977cb 100644 --- a/lib/sentry/opentelemetry/span_storage.ex +++ b/lib/sentry/opentelemetry/span_storage.ex @@ -37,6 +37,20 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do {:noreply, state} end + @spec span_exists?(String.t(), keyword()) :: boolean() + def span_exists?(span_id, opts \\ []) do + table_name = Keyword.get(opts, :table_name, default_table_name()) + + case :ets.lookup(table_name, {:root_span, span_id}) do + [{{:root_span, ^span_id}, _span, _stored_at}] -> true + [] -> + case :ets.match_object(table_name, {{:child_span, :_, span_id}, :_, :_}) do + [] -> false + _ -> true + end + end + end + @spec store_span(SpanRecord.t(), keyword()) :: true def store_span(span_data, opts \\ []) do table_name = Keyword.get(opts, :table_name, default_table_name()) diff --git a/test/sentry/opentelemetry/span_processor_test.exs b/test/sentry/opentelemetry/span_processor_test.exs index 5b45bc17..abd4ca8d 100644 --- a/test/sentry/opentelemetry/span_processor_test.exs +++ b/test/sentry/opentelemetry/span_processor_test.exs @@ -449,378 +449,4 @@ defmodule Sentry.Opentelemetry.SpanProcessorTest do assert [] == SpanStorage.get_child_spans(http_server_span_id, table_name: table_name) end end - - describe "parent-child span ordering (race condition)" do - # These tests verify that child spans are included in transactions even when - # the parent span's on_end is called before the child span's on_end. - # This simulates what happens with telemetry-based instrumentation like - # opentelemetry_phoenix + opentelemetry_ecto where the order of on_end calls - # is not guaranteed. - - @tag span_storage: true - test "includes child spans when parent ends before child (race condition)" do - put_test_config(environment_name: "test", traces_sample_rate: 1.0) - - Sentry.Test.start_collecting_sentry_reports() - - # We need to simulate the race condition where: - # 1. Parent span starts - # 2. Child span starts - # 3. Parent span ends (on_end called) - but child is still running - # 4. Child span ends (on_end called) - after parent already ended - # - # This happens with telemetry-based instrumentation because: - # - Phoenix LiveView mount span ends when the mount callback returns - # - Ecto query span ends when the query completes - # - But with async telemetry events, the order of on_end calls isn't guaranteed - - test_pid = self() - - # Start parent span and get its context - parent_task = - Task.async(fn -> - Tracer.with_span "parent_span", %{kind: :internal} do - # Get parent span ID to pass to child - parent_span_ctx = Tracer.current_span_ctx() - parent_ctx = :otel_ctx.get_current() - - # Send context to main process - send(test_pid, {:parent_ctx, parent_ctx, parent_span_ctx}) - - # Wait for child to start before parent ends - receive do - :child_started -> :ok - after - 5000 -> raise "Timeout waiting for child_started" - end - - # Parent span ends here (when with_span block exits) - # but child is still running - end - - # Signal that parent has ended - send(test_pid, :parent_ended) - end) - - # Receive parent context - parent_ctx = - receive do - {:parent_ctx, ctx, _span_ctx} -> ctx - after - 5000 -> raise "Timeout waiting for parent context" - end - - # Start child span in a separate process with parent context - child_task = - Task.async(fn -> - # Attach parent context to this process - :otel_ctx.attach(parent_ctx) - - Tracer.with_span "child_span_delayed", %{kind: :internal} do - # Signal that child has started - send(test_pid, :child_started) - - # Wait for parent to end first - receive do - :parent_ended -> :ok - after - 5000 -> raise "Timeout waiting for parent_ended" - end - - # Add a small delay to ensure parent's on_end has been processed - Process.sleep(50) - - # Child span ends here (after parent has already ended) - end - end) - - # Wait for child to start - receive do - :child_started -> :ok - after - 5000 -> raise "Timeout waiting for child_started signal" - end - - # Signal parent to end - send(parent_task.pid, :child_started) - - # Wait for parent to end - receive do - :parent_ended -> :ok - after - 5000 -> raise "Timeout waiting for parent_ended signal" - end - - # Signal child to end (after parent has ended) - send(child_task.pid, :parent_ended) - - Task.await(parent_task) - Task.await(child_task) - - # Give some time for async processing - Process.sleep(100) - - # The transaction should include the child span - transactions = Sentry.Test.pop_sentry_transactions() - - assert length(transactions) == 1, "Expected 1 transaction, got #{length(transactions)}" - [transaction] = transactions - - assert transaction.transaction == "parent_span" - - # THIS IS THE KEY ASSERTION: The child span should be included - # even though the parent's on_end was called before the child's on_end - assert length(transaction.spans) == 1, - "Expected 1 child span, got #{length(transaction.spans)}. " <> - "This fails due to the race condition where parent ends before child." - - [child_span] = transaction.spans - assert child_span.op == "child_span_delayed" - assert child_span.parent_span_id == transaction.span_id - end - - @tag span_storage: true - test "includes multiple child spans when parent ends before children (race condition)" do - put_test_config(environment_name: "test", traces_sample_rate: 1.0) - - Sentry.Test.start_collecting_sentry_reports() - - # Similar test but with multiple children ending at different times after the parent - test_pid = self() - - parent_task = - Task.async(fn -> - Tracer.with_span "parent_with_multiple_children", %{kind: :internal} do - parent_ctx = :otel_ctx.get_current() - send(test_pid, {:parent_ctx, parent_ctx}) - - # Wait for both children to start - receive do - :children_started -> :ok - after - 5000 -> raise "Timeout" - end - - # Parent ends here - end - - send(test_pid, :parent_ended) - end) - - parent_ctx = - receive do - {:parent_ctx, ctx} -> ctx - after - 5000 -> raise "Timeout" - end - - # Start two child spans in separate processes - child1_task = - Task.async(fn -> - :otel_ctx.attach(parent_ctx) - - Tracer.with_span "child_1", %{kind: :internal} do - send(test_pid, :child1_started) - - receive do - :end_child1 -> :ok - after - 5000 -> raise "Timeout" - end - end - end) - - child2_task = - Task.async(fn -> - :otel_ctx.attach(parent_ctx) - - Tracer.with_span "child_2", %{kind: :internal} do - send(test_pid, :child2_started) - - receive do - :end_child2 -> :ok - after - 5000 -> raise "Timeout" - end - end - end) - - # Wait for both children to start - receive do - :child1_started -> :ok - after - 5000 -> raise "Timeout" - end - - receive do - :child2_started -> :ok - after - 5000 -> raise "Timeout" - end - - # Signal parent that children have started - send(parent_task.pid, :children_started) - - # Wait for parent to end - receive do - :parent_ended -> :ok - after - 5000 -> raise "Timeout" - end - - # End children in sequence (both after parent) - Process.sleep(50) - send(child1_task.pid, :end_child1) - Process.sleep(20) - send(child2_task.pid, :end_child2) - - Task.await(parent_task) - Task.await(child1_task) - Task.await(child2_task) - - Process.sleep(100) - - transactions = Sentry.Test.pop_sentry_transactions() - - assert length(transactions) == 1, "Expected 1 transaction, got #{length(transactions)}" - [transaction] = transactions - - assert transaction.transaction == "parent_with_multiple_children" - - # Both child spans should be included - assert length(transaction.spans) == 2, - "Expected 2 child spans, got #{length(transaction.spans)}. " <> - "This fails due to the race condition." - - span_ops = Enum.map(transaction.spans, & &1.op) |> Enum.sort() - assert span_ops == ["child_1", "child_2"] - end - - @tag span_storage: true - test "includes child spans created with start_span/end_span (ecto pattern)" do - # This test mimics the exact pattern used by opentelemetry_ecto: - # - Parent span uses with_span (like LiveView mount) - # - Child span uses start_span/end_span directly within the same process - # (like Ecto telemetry handler) - # - # The key difference is that start_span/end_span doesn't nest like with_span, - # so we need to manually attach the parent context. - - put_test_config(environment_name: "test", traces_sample_rate: 1.0) - - Sentry.Test.start_collecting_sentry_reports() - - test_pid = self() - - # This simulates what happens in LiveView + Ecto: - # 1. LiveView mount span starts (via opentelemetry_phoenix telemetry) - # 2. Ecto query runs, which creates a child span via telemetry - # 3. LiveView mount span ends - parent_task = - Task.async(fn -> - Tracer.with_span "LiveView.mount", %{kind: :server} do - # Get the current context to pass to child - parent_ctx = :otel_ctx.get_current() - send(test_pid, {:parent_ctx, parent_ctx}) - - # Wait for child to start - receive do - :child_started -> :ok - after - 5000 -> raise "Timeout" - end - - # Parent ends here, but child may still be running - end - - send(test_pid, :parent_ended) - end) - - # Get parent context - parent_ctx = - receive do - {:parent_ctx, ctx} -> ctx - after - 5000 -> raise "Timeout" - end - - # Simulate Ecto span creation (using start_span/end_span like opentelemetry_ecto does) - child_task = - Task.async(fn -> - # Attach parent context (this is what opentelemetry_process_propagator does) - # opentelemetry_ecto uses OpenTelemetry.Ctx.attach which returns a token - parent_token = OpenTelemetry.Ctx.attach(parent_ctx) - - # Create child span using start_span (not with_span) - # This is exactly what opentelemetry_ecto does - child_span = - OpenTelemetry.Tracer.start_span("Ecto.query", %{ - kind: :client, - attributes: %{ - "db.system" => :postgresql, - "db.statement" => "SELECT * FROM users" - } - }) - - send(test_pid, :child_started) - - # Wait for parent to end first (simulating the race condition) - receive do - :parent_ended -> :ok - after - 5000 -> raise "Timeout" - end - - Process.sleep(50) - - # End the child span (after parent has ended) - # opentelemetry_ecto uses OpenTelemetry.Span.end_span/1, not Tracer.end_span/1 - OpenTelemetry.Span.end_span(child_span) - - # Detach the context token (cleanup) - OpenTelemetry.Ctx.detach(parent_token) - end) - - # Wait for child to start - receive do - :child_started -> :ok - after - 5000 -> raise "Timeout" - end - - # Signal parent to end - send(parent_task.pid, :child_started) - - # Wait for parent to end - receive do - :parent_ended -> :ok - after - 5000 -> raise "Timeout" - end - - # Signal child that parent has ended - send(child_task.pid, :parent_ended) - - Task.await(parent_task) - Task.await(child_task) - - Process.sleep(100) - - transactions = Sentry.Test.pop_sentry_transactions() - - assert length(transactions) == 1, "Expected 1 transaction, got #{length(transactions)}" - [transaction] = transactions - - assert transaction.transaction == "LiveView.mount" - - # The child span should be included even with start_span/end_span pattern - assert length(transaction.spans) == 1, - "Expected 1 child span (Ecto query), got #{length(transaction.spans)}. " <> - "This tests the opentelemetry_ecto pattern." - - [child_span] = transaction.spans - assert child_span.op == "db" - assert child_span.description == "SELECT * FROM users" - end - end end From 4a2ae027bde100619b3b8b9d73fd8809f6830e17 Mon Sep 17 00:00:00 2001 From: Peter Solnica Date: Mon, 1 Dec 2025 15:40:48 +0000 Subject: [PATCH 4/4] Fix formatting --- lib/sentry/opentelemetry/span_storage.ex | 4 +++- test/sentry/logger_handler_test.exs | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/sentry/opentelemetry/span_storage.ex b/lib/sentry/opentelemetry/span_storage.ex index c8e977cb..0f7d6a1e 100644 --- a/lib/sentry/opentelemetry/span_storage.ex +++ b/lib/sentry/opentelemetry/span_storage.ex @@ -42,7 +42,9 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do table_name = Keyword.get(opts, :table_name, default_table_name()) case :ets.lookup(table_name, {:root_span, span_id}) do - [{{:root_span, ^span_id}, _span, _stored_at}] -> true + [{{:root_span, ^span_id}, _span, _stored_at}] -> + true + [] -> case :ets.match_object(table_name, {{:child_span, :_, span_id}, :_, :_}) do [] -> false diff --git a/test/sentry/logger_handler_test.exs b/test/sentry/logger_handler_test.exs index fa10058d..355a2cdd 100644 --- a/test/sentry/logger_handler_test.exs +++ b/test/sentry/logger_handler_test.exs @@ -671,8 +671,7 @@ defmodule Sentry.LoggerHandlerTest do %ArgumentError{ message: ":sync_threshold and :discard_threshold cannot be used together, one of them must be nil" - }, - _}}} = + }, _}}} = :logger.update_handler_config( @handler_name, :config,