fix(core): Strip inline media from multimodal content before stringification (#19540)

RulaKhaled · web-flow · commit 93e3c30a758c · 2026-03-02T11:05:43.000+02:00
- Fixes a bug where base64 image/audio data in LangChain multimodal messages leaked into gen_ai.input.messages span attributes unredacted - The root cause was normalizeLangChainMessages calling asString() (which JSON.stringifies arrays) on multimodal content before the media stripping pipeline could inspect it, so stripInlineMediaFromMessages never saw structured objects to redact - Adds normalizeContent() that applies stripInlineMediaFromSingleMessage to array/object content parts before stringification, matching the [Blob substitute] behavior already working for OpenAI/Anthrop Closes #19539
diff --git a/packages/core/src/tracing/langchain/utils.ts b/packages/core/src/tracing/langchain/utils.ts
@@ -25,6 +25,7 @@ import {
   GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE,
   GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE,
 } from '../ai/gen-ai-attributes';
+import { isContentMedia, stripInlineMediaFromSingleMessage } from '../ai/mediaStripping';
 import { truncateGenAiMessages } from '../ai/messageTruncation';
 import { extractSystemInstructions } from '../ai/utils';
 import { LANGCHAIN_ORIGIN, ROLE_MAP } from './constants';
@@ -62,6 +63,38 @@ function asString(v: unknown): string {
   }
 }
 
+/**
+ * Converts message content to a string, stripping inline media (base64 images, audio, etc.)
+ * from multimodal content before stringification so downstream media stripping can't miss it.
+ *
+ * @example
+ * // String content passes through unchanged:
+ * normalizeContent("Hello") // => "Hello"
+ *
+ * // Multimodal array content — media is replaced with "[Blob substitute]" before JSON.stringify:
+ * normalizeContent([
+ *   { type: "text", text: "What color?" },
+ *   { type: "image_url", image_url: { url: "data:image/png;base64,iVBOR..." } }
+ * ])
+ * // => '[{"type":"text","text":"What color?"},{"type":"image_url","image_url":{"url":"[Blob substitute]"}}]'
+ *
+ * // Without this, asString() would JSON.stringify the raw array and the base64 blob
+ * // would end up in span attributes, since downstream stripping only works on objects.
+ */
+function normalizeContent(v: unknown): string {
+  if (Array.isArray(v)) {
+    try {
+      const stripped = v.map(part =>
+        part && typeof part === 'object' && isContentMedia(part) ? stripInlineMediaFromSingleMessage(part) : part,
+      );
+      return JSON.stringify(stripped);
+    } catch {
+      return String(v);
+    }
+  }
+  return asString(v);
+}
+
 /**
  * Normalizes a single role token to our canonical set.
  *
@@ -123,7 +156,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
       const messageType = maybeGetType.call(message);
       return {
         role: normalizeMessageRole(messageType),
-        content: asString(message.content),
+        content: normalizeContent(message.content),
       };
     }
 
@@ -136,7 +169,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
 
       return {
         role: normalizeMessageRole(role),
-        content: asString(message.kwargs?.content),
+        content: normalizeContent(message.kwargs?.content),
       };
     }
 
@@ -145,7 +178,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
       const role = String(message.type).toLowerCase();
       return {
         role: normalizeMessageRole(role),
-        content: asString(message.content),
+        content: normalizeContent(message.content),
       };
     }
 
@@ -154,7 +187,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
     if (message.role) {
       return {
         role: normalizeMessageRole(String(message.role)),
-        content: asString(message.content),
+        content: normalizeContent(message.content),
       };
     }
 
@@ -164,14 +197,14 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
     if (ctor && ctor !== 'Object') {
       return {
         role: normalizeMessageRole(normalizeRoleNameFromCtor(ctor)),
-        content: asString(message.content),
+        content: normalizeContent(message.content),
       };
     }
 
     // 6) Fallback: treat as user text
     return {
       role: 'user',
-      content: asString(message.content),
+      content: normalizeContent(message.content),
     };
   });
 }
diff --git a/packages/core/test/lib/tracing/langchain-utils.test.ts b/packages/core/test/lib/tracing/langchain-utils.test.ts
@@ -0,0 +1,248 @@
+import { describe, expect, it } from 'vitest';
+import { GEN_AI_INPUT_MESSAGES_ATTRIBUTE } from '../../../src/tracing/ai/gen-ai-attributes';
+import type { LangChainMessage } from '../../../src/tracing/langchain/types';
+import { extractChatModelRequestAttributes, normalizeLangChainMessages } from '../../../src/tracing/langchain/utils';
+
+describe('normalizeLangChainMessages', () => {
+  it('normalizes messages with _getType()', () => {
+    const messages = [
+      {
+        _getType: () => 'human',
+        content: 'Hello',
+      },
+      {
+        _getType: () => 'ai',
+        content: 'Hi there!',
+      },
+    ] as unknown as LangChainMessage[];
+
+    const result = normalizeLangChainMessages(messages);
+    expect(result).toEqual([
+      { role: 'user', content: 'Hello' },
+      { role: 'assistant', content: 'Hi there!' },
+    ]);
+  });
+
+  it('normalizes messages with type property', () => {
+    const messages: LangChainMessage[] = [
+      { type: 'human', content: 'Hello' },
+      { type: 'ai', content: 'Hi!' },
+    ];
+
+    const result = normalizeLangChainMessages(messages);
+    expect(result).toEqual([
+      { role: 'user', content: 'Hello' },
+      { role: 'assistant', content: 'Hi!' },
+    ]);
+  });
+
+  it('normalizes messages with role property', () => {
+    const messages: LangChainMessage[] = [
+      { role: 'user', content: 'Hello' },
+      { role: 'assistant', content: 'Hi!' },
+    ];
+
+    const result = normalizeLangChainMessages(messages);
+    expect(result).toEqual([
+      { role: 'user', content: 'Hello' },
+      { role: 'assistant', content: 'Hi!' },
+    ]);
+  });
+
+  it('normalizes serialized LangChain format', () => {
+    const messages: LangChainMessage[] = [
+      {
+        lc: 1,
+        id: ['langchain_core', 'messages', 'HumanMessage'],
+        kwargs: { content: 'Hello from serialized' },
+      },
+    ];
+
+    const result = normalizeLangChainMessages(messages);
+    expect(result).toEqual([{ role: 'user', content: 'Hello from serialized' }]);
+  });
+
+  describe('multimodal content media stripping', () => {
+    const b64Data = `iVBORw0KGgoAAAANSUhEUgAAAAUA${'A'.repeat(200)}`;
+    const BLOB_SUBSTITUTE = '[Blob substitute]';
+
+    it('strips base64 image_url from multimodal array content via _getType()', () => {
+      const messages = [
+        {
+          _getType: () => 'human',
+          content: [
+            { type: 'text', text: 'What color is in this image?' },
+            { type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
+          ],
+        },
+      ] as unknown as LangChainMessage[];
+
+      const result = normalizeLangChainMessages(messages);
+      expect(result).toHaveLength(1);
+      expect(result[0]!.role).toBe('user');
+
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed).toHaveLength(2);
+      expect(parsed[0]).toEqual({ type: 'text', text: 'What color is in this image?' });
+      expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
+      expect(result[0]!.content).not.toContain(b64Data);
+    });
+
+    it('strips base64 data from Anthropic-style source blocks', () => {
+      const messages = [
+        {
+          _getType: () => 'human',
+          content: [
+            { type: 'text', text: 'Describe this image' },
+            {
+              type: 'image',
+              source: {
+                type: 'base64',
+                media_type: 'image/png',
+                data: b64Data,
+              },
+            },
+          ],
+        },
+      ] as unknown as LangChainMessage[];
+
+      const result = normalizeLangChainMessages(messages);
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed[1].source.data).toBe(BLOB_SUBSTITUTE);
+      expect(result[0]!.content).not.toContain(b64Data);
+    });
+
+    it('strips base64 from inline_data (Google GenAI style)', () => {
+      const messages: LangChainMessage[] = [
+        {
+          type: 'human',
+          content: [
+            { type: 'text', text: 'Describe' },
+            { inlineData: { mimeType: 'image/png', data: b64Data } },
+          ] as unknown as string,
+        },
+      ];
+
+      const result = normalizeLangChainMessages(messages);
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed[1].inlineData.data).toBe(BLOB_SUBSTITUTE);
+      expect(result[0]!.content).not.toContain(b64Data);
+    });
+
+    it('strips base64 from input_audio content parts', () => {
+      const messages = [
+        {
+          _getType: () => 'human',
+          content: [
+            { type: 'text', text: 'What do you hear?' },
+            { type: 'input_audio', input_audio: { data: b64Data } },
+          ],
+        },
+      ] as unknown as LangChainMessage[];
+
+      const result = normalizeLangChainMessages(messages);
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed[1].input_audio.data).toBe(BLOB_SUBSTITUTE);
+      expect(result[0]!.content).not.toContain(b64Data);
+    });
+
+    it('preserves text-only array content without modification', () => {
+      const messages = [
+        {
+          _getType: () => 'human',
+          content: [
+            { type: 'text', text: 'First part' },
+            { type: 'text', text: 'Second part' },
+          ],
+        },
+      ] as unknown as LangChainMessage[];
+
+      const result = normalizeLangChainMessages(messages);
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed).toEqual([
+        { type: 'text', text: 'First part' },
+        { type: 'text', text: 'Second part' },
+      ]);
+    });
+
+    it('strips media from serialized LangChain format with array content', () => {
+      const messages: LangChainMessage[] = [
+        {
+          lc: 1,
+          id: ['langchain_core', 'messages', 'HumanMessage'],
+          kwargs: {
+            content: [
+              { type: 'text', text: 'Describe this' },
+              { type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
+            ] as unknown as string,
+          },
+        },
+      ];
+
+      const result = normalizeLangChainMessages(messages);
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
+      expect(result[0]!.content).not.toContain(b64Data);
+    });
+
+    it('strips media from messages with role property and array content', () => {
+      const messages: LangChainMessage[] = [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: 'Look at this' },
+            { type: 'image_url', image_url: { url: `data:image/jpeg;base64,${b64Data}` } },
+          ] as unknown as string,
+        },
+      ];
+
+      const result = normalizeLangChainMessages(messages);
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
+      expect(result[0]!.content).not.toContain(b64Data);
+    });
+
+    it('strips media from messages with type property and array content', () => {
+      const messages: LangChainMessage[] = [
+        {
+          type: 'human',
+          content: [
+            { type: 'text', text: 'Check this' },
+            { type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
+          ] as unknown as string,
+        },
+      ];
+
+      const result = normalizeLangChainMessages(messages);
+      const parsed = JSON.parse(result[0]!.content);
+      expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
+    });
+  });
+});
+
+describe('extractChatModelRequestAttributes with multimodal content', () => {
+  const b64Data = `iVBORw0KGgoAAAANSUhEUgAAAAUA${'A'.repeat(200)}`;
+
+  it('strips base64 from input messages attribute', () => {
+    const serialized = { id: ['langchain', 'chat_models', 'openai'], name: 'ChatOpenAI' };
+    const messages: LangChainMessage[][] = [
+      [
+        {
+          _getType: () => 'human',
+          content: [
+            { type: 'text', text: 'What is in this image?' },
+            { type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
+          ],
+        } as unknown as LangChainMessage,
+      ],
+    ];
+
+    const attrs = extractChatModelRequestAttributes(serialized, messages, true);
+    const inputMessages = attrs[GEN_AI_INPUT_MESSAGES_ATTRIBUTE] as string | undefined;
+
+    expect(inputMessages).toBeDefined();
+    expect(inputMessages).not.toContain(b64Data);
+    expect(inputMessages).toContain('[Blob substitute]');
+    expect(inputMessages).toContain('What is in this image?');
+  });
+});