diff --git a/poetry.lock b/poetry.lock index 5b048da1..ea617597 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -480,7 +480,7 @@ files = [ {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] -markers = {main = "platform_python_implementation == \"PyPy\" or (extra == \"crewai\" or extra == \"all\") and python_version <= \"3.13\"", test = "platform_python_implementation == \"PyPy\""} +markers = {main = "(extra == \"langchain\" or extra == \"all\") and platform_python_implementation == \"PyPy\" or python_version <= \"3.13\" and (extra == \"langchain\" or extra == \"all\") and (extra == \"crewai\" or extra == \"all\") or python_version <= \"3.13\" and (extra == \"crewai\" or extra == \"all\") and platform_python_implementation != \"PyPy\"", test = "platform_python_implementation == \"PyPy\""} [package.dependencies] pycparser = "*" @@ -585,6 +585,7 @@ files = [ {file = "charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a"}, {file = "charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14"}, ] +markers = {main = "python_version < \"3.13\" and (extra == \"crewai\" or extra == \"all\" or extra == \"langchain\" or extra == \"openai\" or extra == \"otel\") or extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or extra == \"otel\" or python_version <= \"3.13\" and (extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or extra == \"otel\" or extra == \"crewai\")"} [[package]] name = "chromadb" @@ -662,7 +663,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "platform_system == \"Windows\" or extra == \"openai\" or extra == \"all\" or (extra == \"openai\" or extra == \"all\" or extra == \"crewai\") and os_name == \"nt\" and python_version <= \"3.13\" or (extra == \"openai\" or extra == \"all\" or extra == \"crewai\") and sys_platform == \"win32\" and python_version <= \"3.13\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\"", test = "sys_platform == \"win32\""} +markers = {main = "(platform_system == \"Windows\" or os_name == \"nt\" or sys_platform == \"win32\" or extra == \"openai\" or extra == \"all\") and (platform_system == \"Windows\" or extra == \"openai\" or extra == \"all\" or extra == \"crewai\") and (python_version <= \"3.13\" or extra == \"openai\" or extra == \"all\") and (extra == \"crewai\" or extra == \"all\" or extra == \"openai\")", dev = "platform_system == \"Windows\" or sys_platform == \"win32\"", test = "sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -949,6 +950,7 @@ files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, ] +markers = {main = "(extra == \"crewai\" or extra == \"all\" or extra == \"openai\") and python_version <= \"3.13\" or extra == \"openai\" or extra == \"all\""} [[package]] name = "docstring-parser" @@ -1254,30 +1256,34 @@ tqdm = ["tqdm"] [[package]] name = "galileo-core" -version = "3.82.1" +version = "4.0.0" description = "Shared schemas and configuration for Galileo's Python packages." optional = false -python-versions = ">=3.10.0" +python-versions = "^3.10.0" groups = ["main", "test"] -files = [ - {file = "galileo_core-3.82.1-py3-none-any.whl", hash = "sha256:3008c3318ab3a5e0c6fc0358befaf56f834388b98ecf918c384a67bafe62098e"}, - {file = "galileo_core-3.82.1.tar.gz", hash = "sha256:7aac99faa78f258a198b455c1c3f9e02e0434711237b5916ade117dc7931c03f"}, -] +files = [] +develop = false [package.dependencies] httpx = ">=0.27.0,<0.29.0" -pydantic = ">=2.6.0,<3.0.0" -pydantic-partial = ">=0.10.1,<0.11.0" -pydantic-settings = ">=2.2.1,<3.0.0" -pyjwt = ">=2.8.0,<3.0.0" -pytest = {version = ">=8.2.1,<9.0.0", optional = true, markers = "extra == \"testing\""} -respx = {version = ">=0.22.0,<0.23.0", optional = true, markers = "extra == \"testing\""} -typing-extensions = ">=4.12.2,<5.0.0" -uvloop = {version = ">=0.21.0,<0.22.0", markers = "sys_platform != \"win32\""} +pydantic = "^2.6.0" +pydantic-partial = "^0.10.1" +pydantic-settings = "^2.2.1" +pyjwt = "^2.8.0" +pytest = {version = "^8.2.1", optional = true} +respx = {version = "^0.22.0", optional = true} +typing-extensions = "^4.12.2" +uvloop = {version = "^0.21.0", markers = "sys_platform != \"win32\""} [package.extras] testing = ["pytest (>=8.2.1,<9.0.0)", "respx (>=0.22.0,<0.23.0)"] +[package.source] +type = "git" +url = "https://github.com/rungalileo/galileo-core.git" +reference = "feature/sc-56110/-sdk-python-use-new-content-schema-in-langchain" +resolved_reference = "3ddc62a21e6ad723c9b985865d4c272172aafc09" + [[package]] name = "google-auth" version = "2.40.3" @@ -1918,6 +1924,7 @@ files = [ {file = "jiter-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:1b28302349dc65703a9e4ead16f163b1c339efffbe1049c30a44b001a2a4fff9"}, {file = "jiter-0.10.0.tar.gz", hash = "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500"}, ] +markers = {main = "(extra == \"crewai\" or extra == \"all\" or extra == \"openai\") and python_version <= \"3.13\" or extra == \"openai\" or extra == \"all\""} [[package]] name = "json-repair" @@ -1959,6 +1966,7 @@ files = [ {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, ] +markers = {main = "extra == \"langchain\" or extra == \"all\""} [package.dependencies] jsonpointer = ">=1.9" @@ -1994,6 +2002,7 @@ files = [ {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"}, ] +markers = {main = "extra == \"langchain\" or extra == \"all\""} [[package]] name = "jsonref" @@ -2023,7 +2032,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" +jsonschema-specifications = ">=2023.3.6" referencing = ">=0.28.4" rpds-py = ">=0.7.1" @@ -2061,7 +2070,7 @@ files = [ ] [package.dependencies] -certifi = ">=14.05.14" +certifi = ">=14.5.14" durationpy = ">=0.7" google-auth = ">=1.0.1" oauthlib = ">=3.2.2" @@ -2123,6 +2132,7 @@ files = [ {file = "langchain_core-1.2.7-py3-none-any.whl", hash = "sha256:452f4fef7a3d883357b22600788d37e3d8854ef29da345b7ac7099f33c31828b"}, {file = "langchain_core-1.2.7.tar.gz", hash = "sha256:e1460639f96c352b4a41c375f25aeb8d16ffc1769499fb1c20503aad59305ced"}, ] +markers = {main = "extra == \"langchain\" or extra == \"all\""} [package.dependencies] jsonpatch = ">=1.33.0,<2.0.0" @@ -2217,6 +2227,7 @@ files = [ {file = "langsmith-0.4.14-py3-none-any.whl", hash = "sha256:b6d070ac425196947d2a98126fb0e35f3b8c001a2e6e5b7049dd1c56f0767d0b"}, {file = "langsmith-0.4.14.tar.gz", hash = "sha256:4d29c7a9c85b20ba813ab9c855407bccdf5eb4f397f512ffa89959b2a2cb83ed"}, ] +markers = {main = "extra == \"langchain\" or extra == \"all\""} [package.dependencies] httpx = ">=0.23.0,<1" @@ -2946,6 +2957,7 @@ files = [ {file = "openai-1.95.1-py3-none-any.whl", hash = "sha256:8bbdfeceef231b1ddfabbc232b179d79f8b849aab5a7da131178f8d10e0f162f"}, {file = "openai-1.95.1.tar.gz", hash = "sha256:f089b605282e2a2b6776090b4b46563ac1da77f56402a222597d591e2dcc1086"}, ] +markers = {main = "(extra == \"crewai\" or extra == \"all\" or extra == \"openai\") and python_version <= \"3.13\" or extra == \"openai\" or extra == \"all\""} [package.dependencies] anyio = ">=3.5.0,<5" @@ -3272,7 +3284,7 @@ files = [ {file = "orjson-3.11.2-cp39-cp39-win_amd64.whl", hash = "sha256:c9ec0cc0d4308cad1e38a1ee23b64567e2ff364c2a3fe3d6cbc69cf911c45712"}, {file = "orjson-3.11.2.tar.gz", hash = "sha256:91bdcf5e69a8fd8e8bdb3de32b31ff01d2bd60c1e8d5fe7d5afabdcf19920309"}, ] -markers = {main = "platform_python_implementation != \"PyPy\" or extra == \"langchain\" or extra == \"all\" or (extra == \"langchain\" or extra == \"all\" or extra == \"crewai\") and python_version <= \"3.13\"", test = "platform_python_implementation != \"PyPy\""} +markers = {main = "extra == \"langchain\" or extra == \"all\" or (extra == \"crewai\" or extra == \"all\" or extra == \"langchain\") and python_version <= \"3.13\"", test = "platform_python_implementation != \"PyPy\""} [[package]] name = "ormsgpack" @@ -3357,6 +3369,7 @@ files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] +markers = {main = "python_version < \"3.13\" and (extra == \"crewai\" or extra == \"all\" or extra == \"langchain\" or extra == \"openai\") or extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or python_version <= \"3.13\" and (extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or extra == \"crewai\")"} [[package]] name = "parso" @@ -4101,7 +4114,7 @@ files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] -markers = {main = "platform_python_implementation == \"PyPy\" or (extra == \"crewai\" or extra == \"all\") and python_version <= \"3.13\"", test = "platform_python_implementation == \"PyPy\""} +markers = {main = "(extra == \"langchain\" or extra == \"all\") and platform_python_implementation == \"PyPy\" or python_version <= \"3.13\" and (extra == \"langchain\" or extra == \"all\") and (extra == \"crewai\" or extra == \"all\") or python_version <= \"3.13\" and (extra == \"crewai\" or extra == \"all\") and platform_python_implementation != \"PyPy\"", test = "platform_python_implementation == \"PyPy\""} [[package]] name = "pydantic" @@ -4690,6 +4703,7 @@ files = [ {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +markers = {main = "(extra == \"crewai\" or extra == \"all\" or extra == \"langchain\") and python_version <= \"3.13\" or extra == \"langchain\" or extra == \"all\""} [[package]] name = "referencing" @@ -4818,6 +4832,7 @@ files = [ {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, ] +markers = {main = "python_version < \"3.13\" and (extra == \"crewai\" or extra == \"all\" or extra == \"langchain\" or extra == \"openai\" or extra == \"otel\") or extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or extra == \"otel\" or python_version <= \"3.13\" and (extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or extra == \"otel\" or extra == \"crewai\")"} [package.dependencies] certifi = ">=2017.4.17" @@ -4878,6 +4893,7 @@ files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, ] +markers = {main = "extra == \"langchain\" or extra == \"all\""} [package.dependencies] requests = ">=2.0.1,<3.0.0" @@ -5332,6 +5348,7 @@ files = [ {file = "tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138"}, {file = "tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb"}, ] +markers = {main = "(extra == \"crewai\" or extra == \"all\" or extra == \"langchain\") and python_version <= \"3.13\" or extra == \"langchain\" or extra == \"all\""} [package.extras] doc = ["reno", "sphinx"] @@ -5619,6 +5636,7 @@ files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, ] +markers = {main = "(extra == \"crewai\" or extra == \"all\" or extra == \"openai\") and python_version <= \"3.13\" or extra == \"openai\" or extra == \"all\""} [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} @@ -5745,11 +5763,11 @@ description = "HTTP library with thread-safe connection pooling, file post, and optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" groups = ["main", "test"] -markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, ] +markers = {main = "platform_python_implementation == \"PyPy\" and (extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or extra == \"otel\") or platform_python_implementation == \"PyPy\" and (extra == \"crewai\" or extra == \"all\" or extra == \"langchain\" or extra == \"openai\" or extra == \"otel\") and python_version <= \"3.13\"", test = "platform_python_implementation == \"PyPy\""} [package.extras] brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] @@ -5763,11 +5781,11 @@ description = "HTTP library with thread-safe connection pooling, file post, and optional = false python-versions = ">=3.9" groups = ["main", "test"] -markers = "platform_python_implementation != \"PyPy\"" files = [ {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, ] +markers = {main = "platform_python_implementation != \"PyPy\" and (extra == \"langchain\" or extra == \"all\" or extra == \"openai\" or extra == \"otel\") or platform_python_implementation != \"PyPy\" and (extra == \"crewai\" or extra == \"all\" or extra == \"langchain\" or extra == \"openai\" or extra == \"otel\") and python_version <= \"3.13\"", test = "platform_python_implementation != \"PyPy\""} [package.extras] brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] @@ -5806,6 +5824,7 @@ files = [ {file = "uuid_utils-0.13.0-pp311-pypy311_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:b7ccaa20e24c5f60f41a69ef571ed820737f9b0ade4cbeef56aaa8f80f5aa475"}, {file = "uuid_utils-0.13.0.tar.gz", hash = "sha256:4c17df6427a9e23a4cd7fb9ee1efb53b8abb078660b9bdb2524ca8595022dfe1"}, ] +markers = {main = "extra == \"langchain\" or extra == \"all\""} [[package]] name = "uv" @@ -6717,6 +6736,7 @@ files = [ {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"}, {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"}, ] +markers = {main = "extra == \"langchain\" or extra == \"all\""} [package.dependencies] cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\""} @@ -6735,4 +6755,4 @@ otel = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] [metadata] lock-version = "2.1" python-versions = "^3.10,<3.15" -content-hash = "c1ef20ce1593e132897a3eaf576eef4a49b18e78469dcd4e84b4b83cffea889e" +content-hash = "3000223c0edd288f094855dc5cb7d3911edad2b6b38ce675098b4813f150214b" diff --git a/pyproject.toml b/pyproject.toml index b68d2a15..2ad137c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ langchain-core = { version = ">=0.3.68", optional = true } langchain = { version = ">=0.0.0,<2.0.0", optional = true } openai = { version = "<1.96.0", optional = true } openai-agents = { version = "<0.2.1", optional = true } -galileo-core = "^3.82.1" +galileo-core = {git = "https://github.com/rungalileo/galileo-core.git", branch = "feature/sc-56110/-sdk-python-use-new-content-schema-in-langchain"} starlette = { version = ">=0.27.0", optional = true } backoff = "^2.2.1" crewai = { version = ">=0.152.0,<2.0.0", optional = true, python = ">=3.10,<3.14" } @@ -38,7 +38,7 @@ pytest-xdist = "^3.7.0" pytest-socket = "^0.7" pytest-asyncio = "^1.0.0" requests-mock = "^1.11.0" -galileo-core = { extras = ["testing"], version = "^3.82.1" } +galileo-core = {git = "https://github.com/rungalileo/galileo-core.git", branch = "feature/sc-56110/-sdk-python-use-new-content-schema-in-langchain", extras = ["testing"]} pytest-env = "^1.1.5" langchain-core = ">=0.3.68" diff --git a/src/galileo/constants/routes.py b/src/galileo/constants/routes.py index 75ceb2e9..5e7d64b1 100644 --- a/src/galileo/constants/routes.py +++ b/src/galileo/constants/routes.py @@ -23,3 +23,5 @@ class Routes(str, Enum): sessions = "/v2/projects/{project_id}/sessions" sessions_search = "/v2/projects/{project_id}/sessions/search" + + ingest_traces = "/ingest/traces/{project_id}" diff --git a/src/galileo/logger/logger.py b/src/galileo/logger/logger.py index c22a33e5..b503eaf2 100644 --- a/src/galileo/logger/logger.py +++ b/src/galileo/logger/logger.py @@ -4,6 +4,7 @@ import inspect import json import logging +import os import time import uuid from datetime import datetime @@ -30,7 +31,7 @@ TracesIngestRequest, TraceUpdateRequest, ) -from galileo.traces import Traces +from galileo.traces import IngestTraces, Traces from galileo.utils.decorators import ( async_warn_catch_exception, nop_async, @@ -153,6 +154,7 @@ class GalileoLogger(TracesLogger): _logger = logging.getLogger("galileo.logger") _traces_client: Optional["Traces"] = None + _ingest_client: Optional["IngestTraces"] = None _task_handler: ThreadPoolTaskHandler _trace_completion_submitted: bool @@ -305,6 +307,11 @@ def __init__( self._traces_client = Traces(project_id=self.project_id, log_stream_id=self.log_stream_id) elif self.experiment_id: self._traces_client = Traces(project_id=self.project_id, experiment_id=self.experiment_id) + + if os.environ.get("GALILEO_INGEST_URL"): + self._ingest_client = IngestTraces( + project_id=self.project_id, log_stream_id=self.log_stream_id, experiment_id=self.experiment_id + ) else: # ingestion_hook path: Traces client not created eagerly. # If the user later calls ingest_traces(), it will be created lazily. @@ -474,6 +481,8 @@ def _ingest_trace_streaming(self, trace: Trace, is_complete: bool = False) -> No ) @retry_on_transient_http_error async def ingest_traces_with_backoff(request: Any) -> None: + if self._ingest_client: + return await self._ingest_client.ingest_traces(request) return await self._traces_client.ingest_traces(request) self._task_handler.submit_task( @@ -1837,6 +1846,8 @@ async def _flush_batch(self) -> list[Trace]: await self._ingestion_hook(traces_ingest_request) else: self._ingestion_hook(traces_ingest_request) + elif self._ingest_client: + await self._ingest_client.ingest_traces(traces_ingest_request) else: await self._traces_client.ingest_traces(traces_ingest_request) diff --git a/src/galileo/traces.py b/src/galileo/traces.py index bd9e2f81..bc0e5ef4 100644 --- a/src/galileo/traces.py +++ b/src/galileo/traces.py @@ -1,10 +1,14 @@ import logging +import os from typing import Any, Optional from uuid import UUID +import httpx + from galileo.config import GalileoPythonConfig from galileo.constants.routes import Routes from galileo.schema.trace import ( + LoggingMethod, LogRecordsSearchRequest, SessionCreateRequest, SpansIngestRequest, @@ -19,6 +23,8 @@ _logger = logging.getLogger(__name__) +INGEST_SERVICE_TIMEOUT_SECONDS = 120.0 + class Traces: """ @@ -159,3 +165,62 @@ async def get_span(self, span_id: str) -> dict[str, str]: return await self._make_async_request( RequestMethod.GET, endpoint=Routes.span.format(project_id=self.project_id, span_id=span_id) ) + + +class IngestTraces: + """Client for the orbit ingest service (``/ingest/traces/:project_id``). + + The ingest service accepts multimodal content blocks natively and + runs on a separate URL from the main Galileo API. + + The service URL is resolved from ``GALILEO_INGEST_URL`` env var. + If not set, it falls back to ``{api_url}/ingest/traces/{project_id}``. + """ + + def __init__(self, project_id: str, log_stream_id: Optional[str] = None, experiment_id: Optional[str] = None): + self.config = GalileoPythonConfig.get() + self.project_id = project_id + self.log_stream_id = log_stream_id + self.experiment_id = experiment_id + + if self.log_stream_id is None and self.experiment_id is None: + raise ValueError("log_stream_id or experiment_id must be set") + + def _get_ingest_base_url(self) -> str: + explicit = os.environ.get("GALILEO_INGEST_URL") + if explicit: + return explicit.rstrip("/") + return str(self.config.api_url or self.config.console_url).rstrip("/") + + def _get_auth_headers(self) -> dict[str, str]: + headers: dict[str, str] = {"Content-Type": "application/json", "X-Galileo-SDK": get_sdk_header()} + if self.config.api_key: + headers["Galileo-API-Key"] = self.config.api_key.get_secret_value() + elif self.config.jwt_token: + headers["Authorization"] = f"Bearer {self.config.jwt_token.get_secret_value()}" + return headers + + @async_warn_catch_exception(logger=_logger) + async def ingest_traces(self, traces_ingest_request: TracesIngestRequest) -> dict[str, Any]: + if self.experiment_id: + traces_ingest_request.experiment_id = UUID(self.experiment_id) + elif self.log_stream_id: + traces_ingest_request.log_stream_id = UUID(self.log_stream_id) + + traces_ingest_request.logging_method = LoggingMethod.python_client + + base_url = self._get_ingest_base_url() + url = f"{base_url}{Routes.ingest_traces.format(project_id=self.project_id)}" + json_body = traces_ingest_request.model_dump(mode="json") + + _logger.info( + "Sending traces to ingest service", + extra={"url": url, "project_id": self.project_id, "num_traces": len(traces_ingest_request.traces)}, + ) + + async with httpx.AsyncClient( + timeout=httpx.Timeout(INGEST_SERVICE_TIMEOUT_SECONDS, connect=10.0), verify=self.config.ssl_context + ) as client: + response = await client.post(url, json=json_body, headers=self._get_auth_headers()) + response.raise_for_status() + return response.json() diff --git a/src/galileo/utils/serialization.py b/src/galileo/utils/serialization.py index 6828fc76..993851c2 100644 --- a/src/galileo/utils/serialization.py +++ b/src/galileo/utils/serialization.py @@ -14,6 +14,7 @@ from pydantic import BaseModel from galileo.utils.dependencies import is_langchain_available, is_langgraph_available, is_proto_plus_available +from galileo_core.schemas.shared.content_blocks import DataContentBlock, IngestContentBlock, TextContentBlock _logger = logging.getLogger(__name__) @@ -42,10 +43,79 @@ def _serialize_zoned_datetime(v: dt.datetime) -> str: def map_langchain_role(role: str) -> str: - role_map = {"ai": "assistant", "human": "user"} + # Non-chunk types like "system", "tool", "function", "chat" pass through unchanged. + # Chunk classes set type to the class name (e.g. "AIMessageChunk") so we map those too. + role_map = { + "ai": "assistant", + "AIMessageChunk": "assistant", + "human": "user", + "HumanMessageChunk": "user", + "SystemMessageChunk": "system", + "ToolMessageChunk": "tool", + "FunctionMessageChunk": "function", + "ChatMessageChunk": "chat", + } return role_map.get(role, role) +# LangChain multimodal message format mapping. +# See https://python.langchain.com/docs/concepts/multimodality/ +# LangChain uses {"type": "_url", "_url": {"url": "..."}} for media, +# and {"type": "text", "text": "..."} for text segments. +_LANGCHAIN_TYPE_TO_MODALITY = { + "image_url": "image", + "audio_url": "audio", + "video_url": "video", + "document_url": "document", + "input_image": "image", + "input_audio": "audio", +} + + +def _convert_langchain_content_block(block: dict) -> IngestContentBlock: + """Convert a single LangChain content block dict to a Galileo ingest content block. + + LangChain multimodal format (https://python.langchain.com/docs/concepts/multimodality/): + {"type": "text", "text": "hello"} + {"type": "image_url", "image_url": {"url": "https://..."}} + {"type": "input_audio", "input_audio": {"data": "base64...", "format": "wav"}} + + Returns either a TextContentBlock or DataContentBlock instance. + """ + block_type = block.get("type", "") + if block_type == "text" or (not block_type and "text" in block): + return TextContentBlock(text=block.get("text", "")) + modality = _LANGCHAIN_TYPE_TO_MODALITY.get(block_type) + if modality: + nested = block.get(block_type, {}) + if isinstance(nested, dict): + url = nested.get("url", "") + raw_data = nested.get("data", "") + fmt = nested.get("format", "") + else: + url = str(nested) + raw_data = "" + fmt = "" + kwargs: dict[str, Any] = {"type": modality} + if url: + if url.startswith("data:"): + kwargs["base64"] = url + else: + kwargs["url"] = url + elif raw_data: + mime = f"{modality}/{fmt}" if fmt else modality + kwargs["base64"] = f"data:{mime};base64,{raw_data}" + return DataContentBlock(**kwargs) + return TextContentBlock(text=str(block)) + + +def _normalize_multimodal_content(dumped: dict) -> None: + """If ``dumped["content"]`` is a list of dicts, convert each to an ingest content block in-place.""" + content = dumped.get("content") + if isinstance(content, list) and content and isinstance(content[0], dict): + dumped["content"] = [_convert_langchain_content_block(b) for b in content] + + class EventSerializer(JSONEncoder): """Custom JSON encoder to assist in the serialization of a wide range of objects.""" @@ -96,6 +166,14 @@ def default(self, obj: Any) -> Any: return self.default(obj.message) if isinstance(obj, LLMResult): return self.default(obj.generations[0]) + # LangChain message type multimodal audit (all 12 types accounted for): + # Branch 1 (AIMessageChunk, AIMessage): explicit — tool_calls + multimodal + # Branch 2 (ToolMessage, ToolMessageChunk via inheritance): explicit — status + multimodal + # Branch 3 (BaseMessage catch-all): HumanMessage, HumanMessageChunk, + # SystemMessage, SystemMessageChunk, ChatMessage, ChatMessageChunk, + # FunctionMessage (deprecated), FunctionMessageChunk (deprecated) + # _normalize_multimodal_content() is called in every branch so list[dict] + # content is converted to IngestContentBlock for all message types. if isinstance(obj, (AIMessageChunk, AIMessage)): # Map the `type` to `role`. if hasattr(obj, "model_dump"): @@ -105,12 +183,7 @@ def default(self, obj: Any) -> Any: else: # Fallback to using the dict method if model_dump is not available i.e pydantic v1 dumped = obj.dict(include={"content", "type", "additional_kwargs", "tool_calls"}) - content = dumped.get("content") - if isinstance(content, list): - # Responses API returns content as a list of dicts - # Convert list content to string format for consistency - if content and isinstance(content[0], dict): - dumped["content"] = content[0].get("text", "") + _normalize_multimodal_content(dumped) dumped["role"] = map_langchain_role(dumped.pop("type")) additional_kwargs = dumped.pop("additional_kwargs", {}) # Check both direct attribute and additional_kwargs for tool_calls @@ -156,6 +229,7 @@ def default(self, obj: Any) -> Any: else: # Fallback to using the dict method if model_dump is not available i.e pydantic v1 dumped = obj.dict(include={"content", "type", "status", "tool_call_id"}) + _normalize_multimodal_content(dumped) dumped["role"] = map_langchain_role(dumped.pop("type")) return dumped if isinstance(obj, BaseMessage): @@ -165,6 +239,7 @@ def default(self, obj: Any) -> Any: else: # Fallback to using the dict method if model_dump is not available i.e pydantic v1 dumped = obj.dict(include={"content", "type"}) + _normalize_multimodal_content(dumped) dumped["role"] = map_langchain_role(dumped.pop("type")) return dumped @@ -212,6 +287,9 @@ def default(self, obj: Any) -> Any: return f"<{obj.__name__}>" return f"<{obj.__name__}>" + if isinstance(obj, (TextContentBlock, DataContentBlock)): + return obj.model_dump(mode="json", exclude_none=True) + if isinstance(obj, BaseModel): if hasattr(obj, "model_dump"): return self.default( diff --git a/tests/test_langchain.py b/tests/test_langchain.py index 31ebac2f..89ffb016 100644 --- a/tests/test_langchain.py +++ b/tests/test_langchain.py @@ -230,6 +230,44 @@ def test_on_chat_model_start(self, callback: GalileoCallback) -> None: assert input_data[1]["role"] == "user" assert input_data[2]["role"] == "assistant" + def test_on_chat_model_start_multimodal(self, callback: GalileoCallback) -> None: + """Test that multimodal HumanMessage produces structured content blocks, not stringified JSON.""" + parent_id = uuid.uuid4() + run_id = uuid.uuid4() + + callback.on_chain_start(serialized={}, inputs={"query": "test"}, run_id=parent_id) + + # Given: a HumanMessage with an image_url content block (LangChain multimodal format) + multimodal_message = HumanMessage( + content=[ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}}, + ] + ) + + # When: on_chat_model_start processes the multimodal message + callback.on_chat_model_start( + serialized={}, + messages=[[multimodal_message]], + run_id=run_id, + parent_run_id=parent_id, + invocation_params={"model": "gpt-4o"}, + ) + + # Then: the input content is a list of structured content blocks, not a flat string + node = callback._handler.get_node(run_id) + assert node is not None + input_data = node.span_params["input"] + assert isinstance(input_data, list) + assert len(input_data) == 1 + content = input_data[0]["content"] + assert isinstance(content, list), "Multimodal content should be a list of blocks, not a string" + assert len(content) == 2 + assert content[0]["type"] == "text" + assert content[0]["text"] == "What is in this image?" + assert content[1]["type"] == "image" + assert content[1]["url"] == "https://example.com/cat.png" + def test_on_chat_model_start_end_with_tools(self, callback: GalileoCallback, galileo_logger: GalileoLogger) -> None: """Test chat model start and end callbacks with tools""" run_id = uuid.uuid4() @@ -923,11 +961,11 @@ def test_ai_message_with_list_content(self, callback: GalileoCallback, galileo_l assert node is not None assert node.node_type == "chat" - # Check that content was properly converted from list to string + # Check that content was properly converted to structured content blocks input_data = node.span_params["input"] assert isinstance(input_data, list) assert len(input_data) == 1 - assert input_data[0]["content"] == "This is a response from the Responses API" + assert input_data[0]["content"] == [{"type": "text", "text": "This is a response from the Responses API"}] assert input_data[0]["role"] == "assistant" def test_ai_message_with_reasoning(self, callback: GalileoCallback, galileo_logger: GalileoLogger) -> None: diff --git a/tests/test_langchain_async.py b/tests/test_langchain_async.py index ed1e5100..13558afc 100644 --- a/tests/test_langchain_async.py +++ b/tests/test_langchain_async.py @@ -900,11 +900,11 @@ async def test_ai_message_with_list_content( assert node is not None assert node.node_type == "chat" - # Check that content was properly converted from list to string + # Check that content was properly converted to structured content blocks input_data = node.span_params["input"] assert isinstance(input_data, list) assert len(input_data) == 1 - assert input_data[0]["content"] == "This is a response from the Responses API" + assert input_data[0]["content"] == [{"type": "text", "text": "This is a response from the Responses API"}] assert input_data[0]["role"] == "assistant" @mark.asyncio diff --git a/tests/utils/test_serialization.py b/tests/utils/test_serialization.py index 21e75465..7d9f8a5e 100644 --- a/tests/utils/test_serialization.py +++ b/tests/utils/test_serialization.py @@ -11,10 +11,26 @@ from unittest.mock import patch import pytest -from langchain_core.messages import AIMessage +from langchain_core.messages import ( + AIMessage, + AIMessageChunk, + ChatMessage, + FunctionMessage, + HumanMessage, + SystemMessage, + ToolMessage, + ToolMessageChunk, +) from pydantic import BaseModel -from galileo.utils.serialization import EventSerializer, convert_to_string_dict, serialize_datetime, serialize_to_str +from galileo.utils.serialization import ( + EventSerializer, + _convert_langchain_content_block, + convert_to_string_dict, + serialize_datetime, + serialize_to_str, +) +from galileo_core.schemas.shared.content_blocks import DataContentBlock, TextContentBlock class TestSerializeDateTime: @@ -783,3 +799,271 @@ def test_proto_plus_nested_message_serialization(self) -> None: # Then: nested message is properly serialized assert result == {"title": "Great Expectations", "author": {"name": "Dickens"}} + + +class TestConvertLangchainContentBlock: + """Test conversion of LangChain content blocks to ingest service format.""" + + def test_text_block(self) -> None: + # Given: a LangChain text content block + block = {"type": "text", "text": "What is in this image?"} + + # When: converting to ingest format + result = _convert_langchain_content_block(block) + + # Then: it returns a TextContentBlock instance + assert isinstance(result, TextContentBlock) + assert result.type == "text" + assert result.text == "What is in this image?" + + def test_image_url_block(self) -> None: + # Given: a LangChain image_url content block + block = {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}} + + # When: converting to ingest format + result = _convert_langchain_content_block(block) + + # Then: it returns a DataContentBlock with modality=image + assert isinstance(result, DataContentBlock) + assert result.type == "image" + assert result.url == "https://example.com/img.png" + + def test_image_url_base64(self) -> None: + # Given: a LangChain image_url with a base64 data URI + block = {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc123"}} + + # When: converting to ingest format + result = _convert_langchain_content_block(block) + + # Then: it returns a DataContentBlock with base64 instead of url + assert isinstance(result, DataContentBlock) + assert result.type == "image" + assert result.base64 == "data:image/png;base64,abc123" + assert result.url is None + + def test_audio_url_block(self) -> None: + # Given: a LangChain audio_url content block + block = {"type": "audio_url", "audio_url": {"url": "https://example.com/audio.mp3"}} + + # When: converting to ingest format + result = _convert_langchain_content_block(block) + + # Then: it returns a DataContentBlock with modality=audio + assert isinstance(result, DataContentBlock) + assert result.type == "audio" + assert result.url == "https://example.com/audio.mp3" + + def test_unknown_block_type_falls_back_to_text(self) -> None: + # Given: an unknown content block type + block = {"type": "custom_thing", "data": "value"} + + # When: converting to ingest format + result = _convert_langchain_content_block(block) + + # Then: it falls back to a TextContentBlock with stringified content + assert isinstance(result, TextContentBlock) + assert result.type == "text" + + +class TestMultimodalContentSerialization: + """Test that multimodal content in LangChain messages is serialized to content blocks.""" + + def test_ai_message_with_text_only_content(self) -> None: + """String content should remain a plain string (backward compat).""" + pytest.importorskip("langchain_core") + + # Given: an AIMessage with string content + message = AIMessage(content="Hello world") + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is still a plain string + assert result["content"] == "Hello world" + assert result["role"] == "assistant" + + def test_ai_message_with_multimodal_content(self) -> None: + """List content should be converted to content blocks, not flattened.""" + pytest.importorskip("langchain_core") + + # Given: an AIMessage with list content (text + image) + message = AIMessage( + content=[ + {"type": "text", "text": "Here is the image analysis"}, + {"type": "image_url", "image_url": {"url": "https://example.com/result.png"}}, + ] + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is an array of content blocks + assert isinstance(result["content"], list) + assert len(result["content"]) == 2 + assert result["content"][0] == {"type": "text", "text": "Here is the image analysis"} + assert result["content"][1] == {"type": "image", "url": "https://example.com/result.png"} + + def test_human_message_with_multimodal_content(self) -> None: + """BaseMessage subclasses with list content should also produce content blocks.""" + pytest.importorskip("langchain_core") + + # Given: a HumanMessage with list content + message = HumanMessage( + content=[ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/photo.jpg"}}, + ] + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is an array of content blocks + assert isinstance(result["content"], list) + assert len(result["content"]) == 2 + assert result["content"][0] == {"type": "text", "text": "What is in this image?"} + assert result["content"][1] == {"type": "image", "url": "https://example.com/photo.jpg"} + assert result["role"] == "user" + + def test_system_message_with_multimodal_content(self) -> None: + pytest.importorskip("langchain_core") + + # Given: a SystemMessage with list content (technically supported by LangChain) + message = SystemMessage( + content=[ + {"type": "text", "text": "You are a vision assistant."}, + {"type": "image_url", "image_url": {"url": "https://example.com/ref.png"}}, + ] + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is converted to content blocks via BaseMessage branch + assert isinstance(result["content"], list) + assert result["content"][0] == {"type": "text", "text": "You are a vision assistant."} + assert result["content"][1] == {"type": "image", "url": "https://example.com/ref.png"} + assert result["role"] == "system" + + def test_tool_message_with_multimodal_content(self) -> None: + pytest.importorskip("langchain_core") + + # Given: a ToolMessage with list content (tool returning image results) + message = ToolMessage( + content=[ + {"type": "text", "text": "Generated chart:"}, + {"type": "image_url", "image_url": {"url": "https://example.com/chart.png"}}, + ], + tool_call_id="call_abc123", + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is converted to content blocks via ToolMessage branch + assert isinstance(result["content"], list) + assert result["content"][0] == {"type": "text", "text": "Generated chart:"} + assert result["content"][1] == {"type": "image", "url": "https://example.com/chart.png"} + assert result["role"] == "tool" + + def test_tool_message_chunk_with_multimodal_content(self) -> None: + pytest.importorskip("langchain_core") + + # Given: a ToolMessageChunk with list content (inherits from ToolMessage) + message = ToolMessageChunk( + content=[ + {"type": "text", "text": "Partial result:"}, + {"type": "image_url", "image_url": {"url": "https://example.com/partial.png"}}, + ], + tool_call_id="call_xyz789", + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is converted via ToolMessage branch (ToolMessageChunk is a subclass) + assert isinstance(result["content"], list) + assert result["content"][0] == {"type": "text", "text": "Partial result:"} + assert result["content"][1] == {"type": "image", "url": "https://example.com/partial.png"} + assert result["role"] == "tool" + + def test_ai_message_chunk_with_multimodal_content(self) -> None: + pytest.importorskip("langchain_core") + + # Given: an AIMessageChunk with list content (streaming variant) + message = AIMessageChunk( + content=[ + {"type": "text", "text": "Here is what I found:"}, + {"type": "image_url", "image_url": {"url": "https://example.com/found.png"}}, + ] + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is converted via AIMessageChunk branch + assert isinstance(result["content"], list) + assert result["content"][0] == {"type": "text", "text": "Here is what I found:"} + assert result["content"][1] == {"type": "image", "url": "https://example.com/found.png"} + assert result["role"] == "assistant" + + def test_chat_message_with_multimodal_content(self) -> None: + pytest.importorskip("langchain_core") + + # Given: a ChatMessage with list content (generic role-based message) + message = ChatMessage( + content=[ + {"type": "text", "text": "Custom role message"}, + {"type": "image_url", "image_url": {"url": "https://example.com/custom.png"}}, + ], + role="custom", + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is converted via BaseMessage catch-all branch + assert isinstance(result["content"], list) + assert result["content"][0] == {"type": "text", "text": "Custom role message"} + assert result["content"][1] == {"type": "image", "url": "https://example.com/custom.png"} + assert result["role"] == "chat" + + def test_function_message_with_multimodal_content(self) -> None: + pytest.importorskip("langchain_core") + + # Given: a FunctionMessage with list content (deprecated but still supported) + message = FunctionMessage( + content=[ + {"type": "text", "text": "Function output:"}, + {"type": "image_url", "image_url": {"url": "https://example.com/func.png"}}, + ], + name="generate_image", + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: content is converted via BaseMessage catch-all branch + assert isinstance(result["content"], list) + assert result["content"][0] == {"type": "text", "text": "Function output:"} + assert result["content"][1] == {"type": "image", "url": "https://example.com/func.png"} + assert result["role"] == "function" + + def test_audio_content_block(self) -> None: + pytest.importorskip("langchain_core") + + # Given: a HumanMessage with an audio_url content block + message = HumanMessage( + content=[ + {"type": "text", "text": "Transcribe this audio:"}, + {"type": "input_audio", "input_audio": {"data": "base64audiodata", "format": "wav"}}, + ] + ) + + # When: serializing with EventSerializer + result = json.loads(json.dumps(message, cls=EventSerializer)) + + # Then: audio block is converted to a DataContentBlock with modality=audio + assert isinstance(result["content"], list) + assert result["content"][0] == {"type": "text", "text": "Transcribe this audio:"} + assert result["content"][1]["type"] == "audio"